Example usage for org.jsoup.nodes Element getElementsByClass

List of usage examples for org.jsoup.nodes Element getElementsByClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByClass.

Prototype

public Elements getElementsByClass(String className) 

Source Link

Document

Find elements that have this class, including or under this element.

Usage

From source file:Main.java

public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://www.your server.com/").get();
    Elements tableRows = doc.select("tr");
    for (Element row : tableRows) {
        Elements cls1 = row.getElementsByClass("cls1");
        Elements cls2 = row.getElementsByClass("cls2");
        Elements cls3 = row.getElementsByClass("cls3");

        if (!cls1.isEmpty() && !cls2.isEmpty() && !cls3.isEmpty()) {
            System.out.println(cls1.get(0).text());
            System.out.println(cls2.get(0).text());
            System.out.println(cls3.get(0).text());
        }//from w  w w . j av a 2 s .co  m
    }
}

From source file:controllers.BBProxy.java

public static F.Promise<Result> index(String query) {

    if (StringUtils.isEmpty(query)) {

        F.Promise.promise(new F.Function0<Object>() {
            @Override/*from ww w.  jav  a  2  s. c om*/
            public Object apply() throws Throwable {
                return ok(Json.toJson("Query parameter (q) not provided "));
            }

        });
    }

    F.Promise<WSResponse> wsResponsePromise = WS.url("http://www.bloomberg.com/search")
            .setQueryParameter("query", query).get();

    return wsResponsePromise.map(new F.Function<WSResponse, Result>() {
        @Override
        public Result apply(WSResponse wsResponse) throws Throwable {

            String body = wsResponse.getBody();

            List<Map<String, String>> ret = new ArrayList<Map<String, String>>();

            try {
                // Insert into map
                org.jsoup.nodes.Document doc = Jsoup.parse(body);
                Elements items = doc.getElementsByClass("search-result");

                for (Element item : items) {
                    Map<String, String> keyValue = new LinkedHashMap<String, String>();

                    keyValue.put("image",
                            item.getElementsByClass("search-result-story__thumbnail__image").attr("src"));
                    keyValue.put("title", item.getElementsByClass("search-result-story__headline").text());

                    int index = item.getElementsByClass("search-result-story__body").text()
                            .indexOf(" (Source: Bloomberg");

                    if (index == -1) {
                        keyValue.put("content", item.getElementsByClass("search-result-story__body").text());
                    } else {
                        keyValue.put("content", item.getElementsByClass("search-result-story__body").text()
                                .substring(0, index));
                    }

                    keyValue.put("date", item.getElementsByClass("published-at").text());
                    keyValue.put("url", "www.bloomberg.com/"
                            + item.getElementsByClass("search-result-story__thumbnail__link").attr("href"));

                    ret.add(keyValue);
                }

            } catch (DOMException e) {
                e.printStackTrace();
            }

            return ok(Json.toJson(ret));
        }
    });
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Element getSingleElementByClass(Element root, String className) {
    Elements elementsByClass = root.getElementsByClass(className);
    if (elementsByClass.size() != 1) {
        String error = "Found " + elementsByClass.size() + " elements with class " + className + " inside of "
                + root.tagName() + "-" + root.className();
        throw new RuntimeException(error + root.html());
    }//w  w w.jav a2 s. c  om
    return elementsByClass.first();
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/** Parses average of each term from GradeSummary.aspx.
 * NOTICE: Does not work for second semester classes in which the second semester schedule
 *  is different from the first semester schedule.
 * //from w ww  . j ava  2 s.co m
 * @param doc the Jsoup element of GradeSummary.aspx
 * @param classList classList as returned by Init.aspx
 * @throws org.json.JSONException
 * @return     [
 *       [classId, avg0, avg1, ...],
 *       [classId, avg0, avg1, ...],
 * ]
 */
public static int[][] gradeSummary(Element doc, JSONArray classList) {

    List<int[]> gradeSummary = new ArrayList<int[]>();

    Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0);
    Elements rows = reportTable.getElementsByTag("tr");
    int rowIndex = 0;

    while (rowIndex < rows.size()) {

        int[] classAverages = new int[11];
        Arrays.fill(classAverages, -3);

        Element row = rows.get(rowIndex);
        Elements columns = row.getElementsByTag("td");

        classAverages[0] = getClassId(row);

        for (int col = 0; col < 10; col++) {
            Element column = columns.get(col);
            String text = column.text();

            // -2 for disabled class
            if (column.attr("class").equals("disabledCell"))
                text = "-2";
            classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text);
        }
        gradeSummary.add(classAverages);
        rowIndex++;
    }

    /*
     * [
     *       [classId, avg0, avg1, ...],
     *       [classId, avg0, avg1, ...],
     * ]
     */
    int[][] result = new int[gradeSummary.size()][];
    for (int i = 0; i < result.length; i++) {
        result[i] = new int[gradeSummary.get(i).length];
        for (int j = 0; j < result[i].length; j++)
            result[i][j] = gradeSummary.get(i)[j];
    }
    return result;
}

From source file:ie.nuim.cs.dri.metadata.WebSearch.java

/**
 *
 * @param xmlString/*from   w  ww  .j a va 2s .c  om*/
 * @param title
 * @return
 * @throws Exception
 */
public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception {
    //System.out.println("CiteSeer search returned:\n"+xmlString);
    Document doc = Jsoup.parse(xmlString);
    ROS ros = new ROS();
    String articleTitle = "";
    int pubYear = 0;

    Elements resultElements = doc.getElementsByClass("result");

    for (Element result : resultElements) {
        Elements titleElement = result.getElementsByClass("doc_details");
        System.out.println(titleElement.text() + "\t" + title);
        if (!titleElement.text().equalsIgnoreCase(title)) {
            break;
        } else {
            ros.setArticleTitle(title);
            Elements authorElement = result.getElementsByClass("pubinfo");
            //authors=authorElement.text();
            Elements yearElement = result.getElementsByClass("pubyear");
            String yearStr = yearElement.text().replace(", ", "");
            int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0);
            System.out.println("year:" + yearElement.text().replace(", ", ""));
            ros.setYear(year);
            Elements citeElement = result.getElementsByClass("citation");
            String[] citedBy = citeElement.text().split(" ");

            int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1);
            ros.setCitedByCount(citeby);

            Elements publicationElement = result.getElementsByClass("pubvenue");
            String pub = publicationElement.text().replace("- ", "").toLowerCase();
            if (pub.contains("journal")) {
                ros.setPublicationType("Journal");
            } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) {
                ros.setPublicationType("Conference");
            } else {
                ros.setPublicationType("");
            }
            ros.setPublicationName(pub);

        }

    }
    // }

    return ros;

}

From source file:fr.arlefebvre.pronostics.controller.EuroMatchListController.java

@RequestMapping("/euro2016/matches")
public List<Match> matches() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Match> result = new ArrayList<Match>();
    String uri = "http://www.lequipe.fr/Football/Euro/Saison-2016/calendrier-resultats.html";

    //On se connecte au site et on charge le document html

    Document doc;/*from w  ww  .  java  2  s  .  co m*/
    try {
        doc = Jsoup.connect(uri).get();

        Elements elements = doc.getElementsByClass("mainDate");
        for (Element element : elements) {
            Element title = element.getElementsByClass("title").first();
            String date = title.text();

            Element tbody = element.getElementsByTag("tbody").first();
            for (Element matchElement : tbody.children()) {
                String groupe = matchElement.getElementsByClass("date").first().text();
                String home = matchElement.getElementsByClass("domicile").first().text();
                String away = matchElement.getElementsByClass("exterieur").first().text();

                Match m = new Match();
                m.setDate(date);
                m.setHomeTeamId(home);
                m.setAwayTeamId(away);
                m.setGroup(groupe);
                result.add(m);
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java

@RequestMapping("/uefa/teams")
public List<Team> teams() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Team> result = new ArrayList<Team>();
    String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html";

    //On se connecte au site et on charge le document html

    Document doc;//  w  w  w .j  a v  a 2  s.co  m
    try {
        doc = Jsoup.connect(uri).get();
        Elements elements = doc.getElementsByClass("table");
        for (Element element : elements) {
            Element tbody = element.getElementsByTag("tbody").first();
            for (Element child : tbody.children()) {
                Element teamNameElement = child.getElementsByClass("tbl-teamname").first();
                String name = teamNameElement.text();
                String countryCode = child.getElementsByClass("tbl-countrycode").first().text();
                String imgUrl = teamNameElement.select("img").first().absUrl("src");
                Team team = new Team();
                team.setName(name);
                team.setCountryCode(countryCode);
                team.setImgUrl(imgUrl);
                team.setNationalTeam(true);
                result.add(team);
            }
        }

        //String titre =  element.text();
    } catch (IOException e) {
        e.printStackTrace();
    }

    //        RestTemplate restTemplate = new RestTemplate();
    //        ResponseEntity<ChampionListDto> response = restTemplate.getForEntity(
    //                uri,
    //                ChampionListDto.class);
    //
    //        List<ChampionDto> champions = response.getBody().getChampions();
    //        return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList());
    result.sort((t1, t2) -> t1.getName().compareTo(t2.getName()));
    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:jobhunter.cb.Client.java

public Job execute() throws IOException, URISyntaxException {
    l.debug("Connecting to {}", url);

    update("Connecting", 1L);
    final Document doc = Jsoup.connect(url).get();

    update("Parsing HTML", 2L);
    final Job job = Job.of();
    job.setPortal(CareerBuilderPlugin.portal);
    job.setLink(url);/*from   ww  w  .  j a  va 2s . c o  m*/

    URLEncodedUtils.parse(new URI(url), "UTF-8").stream()
            .filter(nvp -> nvp.getName().equalsIgnoreCase("job_did")).findFirst()
            .ifPresent(param -> job.setExtId(param.getValue()));

    job.setPosition(doc.getElementById("job-title").text());
    job.setAddress(doc.getElementById("CBBody_Location").text());

    job.getCompany().setName(doc.getElementById("CBBody_CompanyName").text());

    StringBuilder description = new StringBuilder();

    description.append(StringEscapeUtils.unescapeHtml4(doc.getElementById("pnlJobDescription").html()));

    Element div = doc.getElementById("job-requirements");

    description.append(StringEscapeUtils.unescapeHtml4(div.getElementsByClass("section-body").html()));

    div = doc.getElementById("job-snapshot-section");

    description.append(StringEscapeUtils.unescapeHtml4(div.getElementsByClass("section-body").html()));

    job.setDescription(description.toString());
    update("Done", 3L);
    return job;
}

From source file:mx.clickfactura.util.TipoCambioUtil.java

public String getTipoCambio(String fecha) throws CustomBadRequestException, CustomNotFoundException, Exception {

    Pattern pattern = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}$");
    Matcher matcher = null;//w  w w  .  j av a 2s  .c o  m

    matcher = pattern.matcher(fecha.trim());

    if (!matcher.matches()) {
        throw new CustomBadRequestException("Fecha invalida, el formato debe ser: yyyy-MM-dd");
    }

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

    Calendar cal = new GregorianCalendar();

    cal.setTime(sdf.parse(fecha));

    String dia = (cal.get(Calendar.DATE) < 10) ? "0" + cal.get(Calendar.DATE) : cal.get(Calendar.DATE) + "";
    String mes = ((cal.get(Calendar.MONTH) + 1) < 10) ? "0" + (cal.get(Calendar.MONTH) + 1)
            : (cal.get(Calendar.MONTH) + 1) + "";
    String anio = cal.get(Calendar.YEAR) + "";

    String fechaInicial = dia + "%2F" + mes + "%2F" + anio;

    CloseableHttpClient client = HttpClients.createDefault();
    CookieStore cookies = new BasicCookieStore();
    String[] fechaSeparada = fecha.split("-");
    HttpGet get = new HttpGet("http://www.dof.gob.mx/indicadores_detalle.php?cod_tipo_indicador=158&dfecha="
            + fechaInicial + "&hfecha=" + fechaInicial);

    HttpContext httpContext = new BasicHttpContext();
    httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookies);
    CloseableHttpResponse response = client.execute(get, httpContext);

    //System.out.println(response.toString());      
    //System.out.println(response.getStatusLine());
    //System.out.println(response.getEntity().getContentLength());
    InputStream in = response.getEntity().getContent();
    Header encoding = response.getEntity().getContentEncoding();

    String body = IOUtils.toString(in, "UTF-8");
    //System.out.println(body);

    Document doc = Jsoup.parse(body, "UTF-8");

    doc = doc.normalise();

    //System.out.println(doc.toString());
    Elements e = doc.select("table");

    Iterator iterator = e.iterator();

    pattern = Pattern.compile("^\\d{2}\\.\\d{6}$");
    matcher = null;

    String tipoCambio = null;

    while (iterator.hasNext()) {
        Element xd = (Element) iterator.next();
        if (xd.getElementsByClass("txt").hasAttr("height")) {
            if (xd.getElementsByClass("txt").text().split(" ").length == 6) {

                String cambio = xd.getElementsByClass("txt").text().split(" ")[5];
                matcher = pattern.matcher(cambio.trim());

                if (matcher.matches()) {
                    tipoCambio = cambio;
                    //System.out.println(tipoCambio);
                    break;
                }

            }

        }

    }

    client.close();
    response.close();

    if (tipoCambio == null || tipoCambio.isEmpty()) {
        throw new CustomNotFoundException("No hay un tipo de cambio para el da: " + fecha);

    }

    return tipoCambio;

}

From source file:Leitura.Jxr.java

public String leituraJxr() throws IOException { //mtodo para pegar os nomes dos mtodos declarados
    Elements elements = document.getElementsByTag("pre");
    elements.select("a.jxr_linenumber").remove();
    // elements.select("strong.jxr_keyword").remove();
    // elements.select("span.jxr_string").remove();
    // elements.select("em.jxr_comment").remove();
    for (Element children : elements) {
        children.getElementsByClass("jxr_comment").remove();
        children.getElementsByClass("jxr_javadoccomment").remove();
    }//from   ww w .j av  a 2 s .com
    return elements.text(); // retorna o cdigo sem lixo
}