Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:org.jtotus.database.NetworkOP.java

public BigDecimal fetchData(String stockName, DateTime date, int col) {
    BigDecimal result = null;/*  ww w . j a  v  a2  s .  c  o m*/
    URL url;

    System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s col:%d)\n", stockName,
            new StockType(stockName).getHexName(), date.toString(), col);

    try {
        url = new URL(this.buildRequest(date, stockName));

        Document doc = Jsoup.parse(url, 2 * 1000);

        Elements elems = doc.select("td");

        Iterator<Element> iter = elems.iterator();
        while (iter.hasNext()) {
            Element elem = iter.next();
            String data = elem.html();

            String datePattern = dateFormatter.print(date);

            //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>";
            if (data.indexOf(datePattern) != -1) {

                for (int i = 0; i < col; i++) {
                    elem = iter.next();
                }

                data = elem.text();
                String fdata = data.replace(',', '.');

                if (debug) {
                    System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern);
                }

                return BigDecimal.valueOf(Double.valueOf(fdata).doubleValue());
            }
        }

    } catch (IOException ex) {
        System.out.printf("Failed in :%s\n", "NetworkOP");
        //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:org.jtotus.database.NetworkOP.java

private double[] fetchDataPeriod(String stockName, DateTime fromDate, DateTime toDate, int col) {
    List<Double> values = new ArrayList<Double>();
    URL url;/*from w w  w .jav a 2s. c o m*/

    System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s-%s col:%d)\n", stockName,
            new StockType(stockName).getHexName(), fromDate.toString(), toDate.toString(), col);

    try {
        url = new URL(this.buildRequest(fromDate, toDate, stockName));

        Document doc = Jsoup.parse(url, 2 * 1000);

        Elements elems = doc.select("td");

        DateIterator dateIter = new DateIterator(fromDate, toDate);
        while (dateIter.hasNext()) {
            Iterator<Element> iter = elems.iterator();
            String datePattern = dateFormatter.print(dateIter.nextInCalendar());

            while (iter.hasNext()) {
                Element elem = iter.next();
                String data = elem.html();

                //System.out.printf("Fetching.. :%s\n", dateFormatter.print(dateIter.getCurrentAsCalendar()));
                //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>";
                if (data.indexOf(datePattern) != -1) {

                    for (int i = 0; i < col; i++) {
                        elem = iter.next();
                    }

                    data = elem.text();
                    String fdata = data.replace(',', '.');

                    if (debug) {
                        System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern);
                    }

                    values.add(Double.valueOf(fdata));
                    break;
                }
            }
        }

    } catch (IOException ex) {
        System.out.printf("Failed in :%s\n", "NetworkOP");
        //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex);
    }

    return ArrayUtils.toPrimitive(values.toArray(new Double[0]));
}

From source file:org.jtotus.network.NordnetConnect.java

private StockTick parseAuthenticatedStream(String infoPage, String stockName) {
    StockTick tick = null;/*from   w w w . j a  v a2s .  c o m*/

    Document doc = Jsoup.parse(infoPage);
    Elements elements = doc.select("tr[class=first]");

    doc = Jsoup.parse(elements.html());
    elements = doc.select("td");

    if (elements.size() != 15) { //not authenticated 13
        return tick;
    }
    tick = new StockTick();
    tick.setStockName(stockName);

    Iterator<Element> iter = elements.iterator();
    for (int count = 0; iter.hasNext(); count++) {
        Element elem = iter.next();

        log.info("Element value (" + count + "):" + elem.text());
        switch (count) {
        case 3:
            if (!elem.text().equalsIgnoreCase("OMX Helsinki")) {
                System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName);
                return null;
            }
            break;
        case 4://latest price
            tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 5://latest buy
            tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 6://latest sell
            tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 7://latest Highest
            tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 8://latest Lowest
            tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 11://latest Lowest
            tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 12://latest Lowest
            tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 14://Time
            tick.setTime(elem.text().trim());
            break;

        //TODO:currency and time
        default:
            log.info("Not matched(" + count + ") = " + elem.text());
            break;
        }
    }
    log.info("StockTick:" + tick.toString());

    return tick;
}

From source file:org.jtotus.network.NordnetConnect.java

private StockTick parseNonAuthenticatedStream(String infoPage, String stockName) {
    StockTick tick = null;/*from   w  w w .j  a  v  a2 s  . c  om*/

    Document doc = Jsoup.parse(infoPage);
    Elements elements = doc.select("tr[class=first]");

    doc = Jsoup.parse(elements.html());
    elements = doc.select("td");

    if (elements.size() != 13) { //not authenticated 13
        return tick;
    }
    tick = new StockTick();
    tick.setStockName(stockName);

    Iterator<Element> iter = elements.iterator();
    for (int count = 0; iter.hasNext(); count++) {
        Element elem = iter.next();

        System.out.printf("Non-Auth Element value (%d):%s for:%s\n", count, elem.text(), stockName);
        switch (count) {
        case 1:
            if (!elem.text().equalsIgnoreCase("OMX Helsinki")) {
                System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName);
                return null;
            }
            break;
        case 2://latest price
            tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 3://latest buy
            tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 4://latest sell
            tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 5://latest Highest
            tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 6://latest Lowest
            tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 9://Volume
            tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 10://Trade Sum
            tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 12://Time
            tick.setTime(elem.text().trim());
            break;

        //TODO:currency and time
        default:
            System.out.printf("Not matched(%d) = %s \n", count, elem.text());
            break;
        }
    }
    System.out.printf("StockTick:%s\n", tick.toString());

    return tick;
}

From source file:org.lockss.extractor.JsoupTagExtractor.java

/**
 * extract the values for the as defined by the selectors and store them in
 * article. These can be selectors or they can be css/jquery selection strings
 * metadata/*from   w  w  w  .  ja v a 2  s.c o m*/
 * @param doc the jsoup parsed doc
 * @param articleMeta the ArticleMetadata in which to store the selector/value(s)
 */
void extractSelectors(Document doc, ArticleMetadata articleMeta) {

    // if we don't have any selectors there is nothing to do, so we return
    if (m_selectors == null || m_selectors.isEmpty())
        return;

    for (String selector : m_selectors) {
        String val;
        Elements elements = doc.select(selector);
        for (Element element : elements) {
            if (element.hasText()) {
                if (m_isHtml) {
                    val = processHtml(selector, element.text());
                } else {
                    val = processXml(selector, element.text());
                }
                if (theLog.isDebug3())
                    theLog.debug3("Add: " + selector + " = " + val);
                articleMeta.putRaw(selector, val);
            }
        }
    }
}

From source file:org.lockss.extractor.JsoupXmlTagExtractor.java

/**
 * extract the values for the desired tags and store them in article metadata
 * @param doc the jsoup parsed doc//from   www.  j  a v a 2  s .c o  m
 * @param articleMeta the ArticleMetadata in which to store the tag/value(s)
 */
void extractTags(Document doc, ArticleMetadata articleMeta) {
    // if we don't have any tags, there is nothing to do so we return
    if (m_tags == null || m_tags.isEmpty())
        return;
    for (String tag : m_tags) {
        String value;
        Elements tag_elements = doc.select(tag);
        for (Element tag_el : tag_elements) {
            if (tag_el.hasText()) {
                value = processXml(tag, tag_el.text());
                articleMeta.putRaw(tag, value);
            }
        }
    }
}

From source file:org.loklak.api.search.MeetupsCrawlerService.java

public static SusiThought crawlMeetups(String url) {

    Document meetupHTML = null;/*from ww w  .j av a  2s.com*/
    String meetupGroupName = null;
    String meetupType = null;
    String groupDescription = null;
    String groupLocality = null;
    String groupCountry = null;
    String latitude = null;
    String longitude = null;
    String imageLink = null;
    Elements topicList = null;
    String[] topicListArray = new String[100];
    Integer numberOfTopics = 0;
    Elements recentMeetupsSection = null;
    Integer numberOfRecentMeetupsShown = 0;
    Integer i = 0, j = 0;
    String recentMeetupsResult[][] = new String[100][3];
    // recentMeetupsResult[i][0] == date && time
    // recentMeetupsResult[i][1] == Attendance && Review
    // recentMeetupsResult[i][2] == Information

    JSONObject result = new JSONObject();

    try {
        meetupHTML = Jsoup.connect(url).userAgent("Mozilla)").get();

    } catch (Exception e) {
        e.printStackTrace();
    }

    meetupGroupName = meetupHTML.getElementsByAttributeValue("property", "og:title").attr("content");
    result.put("group_name", meetupGroupName);

    meetupType = meetupHTML.getElementsByAttributeValue("property", "og:type").attr("content");
    result.put("meetup_type", meetupType);

    groupDescription = meetupHTML.getElementById("groupDesc").text();
    result.put("group_description", groupDescription);

    groupLocality = meetupHTML.getElementsByAttributeValue("property", "og:locality").attr("content");
    result.put("group_locality", groupLocality);

    groupCountry = meetupHTML.getElementsByAttributeValue("property", "og:country-name").attr("content");
    result.put("group_country_code", groupCountry);

    latitude = meetupHTML.getElementsByAttributeValue("property", "og:latitude").attr("content");
    result.put("group_latitude", latitude);

    longitude = meetupHTML.getElementsByAttributeValue("property", "og:longitude").attr("content");
    result.put("group_longitude", longitude);

    imageLink = meetupHTML.getElementsByAttributeValue("property", "og:image").attr("content");
    result.put("group_imageLink", imageLink);

    topicList = meetupHTML.getElementById("topic-box-2012").getElementsByTag("a");

    int p = 0;
    for (Element topicListStringsIterator : topicList) {
        topicListArray[p] = topicListStringsIterator.text().toString();
        p++;
    }
    numberOfTopics = p;

    JSONArray groupTopics = new JSONArray();
    for (int l = 0; l < numberOfTopics; l++) {
        groupTopics.put(l, topicListArray[l]);
    }
    result.put("group_topics", groupTopics);

    recentMeetupsSection = meetupHTML.getElementById("recentMeetups").getElementsByTag("p");

    i = 0;
    j = 0;

    for (Element recentMeetups : recentMeetupsSection) {
        if (j % 3 == 0) {
            j = 0;
            i++;
        }

        recentMeetupsResult[i][j] = recentMeetups.text().toString();
        j++;

    }

    numberOfRecentMeetupsShown = i;

    JSONArray recentMeetups = new JSONArray();
    for (int k = 1; k < numberOfRecentMeetupsShown; k++) {
        JSONObject obj = new JSONObject();
        obj.put("recent_meetup_number", k);
        obj.put("date_time", recentMeetupsResult[k][0]);
        obj.put("attendance", recentMeetupsResult[k][1]);
        obj.put("information", recentMeetupsResult[k][2]);
        recentMeetups.put(obj);
    }

    result.put("recent_meetups", recentMeetups);

    JSONArray meetupsCrawlerResultArray = new JSONArray();
    meetupsCrawlerResultArray.put(result);

    SusiThought json = new SusiThought();
    json.setData(meetupsCrawlerResultArray);
    return json;
}

From source file:org.loklak.api.search.WordpressCrawlerService.java

public static SusiThought crawlWordpress(String blogURL) {
    Document blogHTML = null;//from   w ww .  j  a v  a 2  s . c  om

    Elements articles = null;
    Elements articleList_title = null;
    Elements articleList_content = null;
    Elements articleList_dateTime = null;
    Elements articleList_author = null;

    String[][] blogPosts = new String[100][4];

    // blogPosts[][0] = Blog Title
    // blogPosts[][1] = Posted On
    // blogPosts[][2] = Author
    // blogPosts[][3] = Blog Content

    Integer numberOfBlogs = 0;
    Integer iterator = 0;

    try {
        blogHTML = Jsoup.connect(blogURL).get();
    } catch (IOException e) {
        e.printStackTrace();
    }

    articles = blogHTML.getElementsByTag("article");

    iterator = 0;
    for (Element article : articles) {

        articleList_title = article.getElementsByClass("entry-title");
        for (Element blogs : articleList_title) {
            blogPosts[iterator][0] = blogs.text().toString();
        }

        articleList_dateTime = article.getElementsByClass("posted-on");
        for (Element blogs : articleList_dateTime) {
            blogPosts[iterator][1] = blogs.text().toString();
        }

        articleList_author = article.getElementsByClass("byline");
        for (Element blogs : articleList_author) {
            blogPosts[iterator][2] = blogs.text().toString();
        }

        articleList_content = article.getElementsByClass("entry-content");
        for (Element blogs : articleList_content) {
            blogPosts[iterator][3] = blogs.text().toString();
        }

        iterator++;

    }

    numberOfBlogs = iterator;

    JSONArray blog = new JSONArray();

    for (int k = 0; k < numberOfBlogs; k++) {
        JSONObject blogpost = new JSONObject();
        blogpost.put("blog_url", blogURL);
        blogpost.put("title", blogPosts[k][0]);
        blogpost.put("posted_on", blogPosts[k][1]);
        blogpost.put("author", blogPosts[k][2]);
        blogpost.put("content", blogPosts[k][3]);
        blog.put(blogpost);
    }

    SusiThought json = new SusiThought();
    json.setData(blog);
    return json;

}

From source file:org.neo4j.browser.CannedCypherExecutionTest.java

@Test
public void shouldBeAbleToExecuteAllTheCannedCypherQueriesContainedInStaticHtmlFiles() throws Exception {
    URL resourceLoc = getClass().getClassLoader().getResource("browser");
    assertNotNull(resourceLoc);//from   ww w. j a va  2s.  c o m

    final AtomicInteger explainCount = new AtomicInteger(0);
    final AtomicInteger executionCount = new AtomicInteger(0);

    Files.walkFileTree(Paths.get(resourceLoc.toURI()), new SimpleFileVisitor<Path>() {
        @Override
        public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException {
            final GraphDatabaseService database = new TestGraphDatabaseFactory().newImpermanentDatabase();

            String fileName = file.getFileName().toString();
            if (fileName.endsWith(".html")) {
                String content = FileUtils.readTextFile(file.toFile(), Charsets.UTF_8);
                Elements cypherElements = Jsoup.parse(content).select("pre.runnable")
                        .not(".standalone-example");
                for (Element cypherElement : cypherElements) {
                    String statement = replaceAngularExpressions(cypherElement.text());

                    if (!statement.startsWith(":")) {
                        if (shouldExplain(statement)) {
                            try (Transaction transaction = database.beginTx()) {
                                Iterable<Notification> actual = database.execute(prependExplain(statement))
                                        .getNotifications();
                                boolean skipKnownInefficientCypher = !cypherElement.parent().select(".warn")
                                        .isEmpty();
                                if (skipKnownInefficientCypher) {

                                    List<Notification> targetCollection = new ArrayList<Notification>();
                                    CollectionUtils.addAll(targetCollection, actual);
                                    CollectionUtils.filter(targetCollection,
                                            new org.apache.commons.collections4.Predicate<Notification>()

                                            {
                                                @Override
                                                public boolean evaluate(Notification notification) {
                                                    return notification.getDescription()
                                                            .contains(NotificationCode.CARTESIAN_PRODUCT
                                                                    .values().toString());
                                                }
                                            });

                                    assertThat(
                                            format("Query [%s] should only produce cartesian product "
                                                    + "notifications. [%s]", statement, fileName),
                                            targetCollection, empty());

                                    explainCount.incrementAndGet();
                                    transaction.success();

                                } else {
                                    assertThat(format("Query [%s] should produce no notifications. [%s]",
                                            statement, fileName), actual, is(emptyIterable()));
                                    explainCount.incrementAndGet();
                                    transaction.success();
                                }
                            } catch (QueryExecutionException e) {
                                throw new AssertionError(
                                        format("Failed to explain query [%s] in file [%s]", statement, file),
                                        e);
                            }
                        }
                        try (Transaction transaction = database.beginTx()) {
                            database.execute(statement);
                            executionCount.incrementAndGet();
                            transaction.success();
                        } catch (QueryExecutionException e) {
                            throw new AssertionError(
                                    format("Failed to execute query [%s] in file [%s]", statement, file), e);
                        }
                    }
                }
            }
            return FileVisitResult.CONTINUE;
        }
    });

    assertTrue("Static files should contain at least one valid cypher statement",
            executionCount.intValue() >= 1);
    System.out.printf("Explained %s cypher statements extracted from HTML files, with no notifications.%n",
            explainCount);
    System.out.printf("Executed %s cypher statements extracted from HTML files, with no errors.%n",
            executionCount);
}

From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageEmailContactLink.java

/**
 * Try to find an email address in both the HTML (so that we can get attributes
 * of elements) as well as in the plain text (in case the HTML has been scrambled
 * to obfuscate the address).//  w  w w  . j a  va  2  s  .  com
 * 
 * @param element
 * @throws DoesNotContainContactLinkException
 * @throws MultipleContactLinksOfSameTypeFoundException 
 */
public StructuredPageEmailContactLink(Element element)
        throws DoesNotContainContactLinkException, MultipleContactLinksOfSameTypeFoundException {
    super(element);
    String content = element.html();
    try {
        this.address = this.findLinkInString(content);
    } catch (DoesNotContainContactLinkException ex) {
        content = element.text();
        this.address = this.findLinkInString(content);
    }
}