Example usage for org.jsoup.nodes Element getElementsByTag

List of usage examples for org.jsoup.nodes Element getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static List<DocBlock> getDocBlock(String jdocBase, Element elem, ClassDocumentation reference) {
    if (elem != null) {
        String baseLink = JDocUtil.getLink(jdocBase, reference);
        List<DocBlock> blocks = new ArrayList<>(10);
        String hashLink = null;/*from  w  w w .  ja  v a2 s.  c  om*/
        for (elem = elem.nextElementSibling(); elem != null; elem = elem.nextElementSibling()) {
            if (elem.tagName().equals("a")) {
                hashLink = '#' + elem.attr("name");
            } else if (elem.tagName().equals("ul")) {
                Element tmp = elem.getElementsByTag("h4").first();
                String title = JDocUtil.fixSpaces(tmp.text().trim());
                String description = "", signature = "";
                OrderedMap<String, List<String>> fields = new ListOrderedMap<>();
                for (; tmp != null; tmp = tmp.nextElementSibling()) {
                    if (tmp.tagName().equals("pre")) {
                        //contains full signature
                        signature = JDocUtil.fixSpaces(tmp.text().trim());
                    } else if (tmp.tagName().equals("div") && tmp.className().equals("block")) {
                        //main block of content (description or deprecation)
                        Element deprecationElem = tmp.getElementsByClass("deprecationComment").first();
                        if (deprecationElem != null) {
                            //deprecation block
                            fields.put("Deprecated:", Collections
                                    .singletonList(JDocUtil.formatText(deprecationElem.html(), baseLink)));
                        } else {
                            //description block
                            description = JDocUtil.formatText(tmp.html(), baseLink);
                        }
                    } else if (tmp.tagName().equals("dl")) {
                        //a field
                        String fieldName = null;
                        List<String> fieldValues = new ArrayList<>();
                        for (Element element : tmp.children()) {
                            if (element.tagName().equals("dt")) {
                                if (fieldName != null) {
                                    fields.put(fieldName, fieldValues);
                                    fieldValues = new ArrayList<>();
                                }
                                fieldName = JDocUtil.fixSpaces(element.text().trim());
                            } else if (element.tagName().equals("dd")) {
                                fieldValues.add(JDocUtil.formatText(element.html(), baseLink));
                            }
                        }
                        if (fieldName != null) {
                            fields.put(fieldName, fieldValues);
                        }
                    }
                }
                blocks.add(new DocBlock(title, hashLink, signature, description, fields));
            }
        }
        return blocks;
    }
    return null;
}

From source file:com.osw.enderecamento.util.EnderecamentoParseUtil.java

/**
 * extract info about a address/*from  www  .ja  va 2s . c  om*/
 * 
 * @param html
 * @return address list
 */
public List<Enderecamento> parseEnderecamento(String html) {

    Document docHtml = Jsoup.parse(html);
    Elements tagTr = docHtml.getElementsByTag("tr");
    List<Enderecamento> enderecamentos = new LinkedList<Enderecamento>();

    for (Element element : tagTr) {
        if (element.getElementsByTag("td").size() > 0) {
            Elements tahTd = element.getElementsByTag("td");
            Enderecamento enderecamento = new Enderecamento();
            enderecamento.setLogradouro(tahTd.get(0).text());
            enderecamento.setBairro(tahTd.get(1).text());
            enderecamento.setCidade(tahTd.get(2).text());
            enderecamento.setCep(tahTd.get(3).text());
            enderecamentos.add(enderecamento);
        }
    }
    return enderecamentos;

}

From source file:popo.defcon.MsgMeCDC.java

void Parse() {
    String input = readPage();/* ww w  .j av a 2 s.  c  om*/
    if (input == null) {
        System.out.println("Error connecting to Internet");
        return;
    }
    String time;
    Document cdc = Jsoup.parse(input);
    Elements notices = cdc.getElementsByTag("tbody");
    Elements alerts = notices.get(1).getElementsByTag("tr");
    alerts.remove(0);
    System.out.println("Current Old Time = " + oldtime);
    for (Element node : alerts) {
        Elements content = node.getElementsByTag("td");
        time = content.last().text();
        if (convertTime(time).compareTo(convertTime(oldtime)) <= 0) {
            MsgMeCDC.oldtime = alerts.get(0).getElementsByTag("td").last().text();
            return;
        }
        System.out.println("Current notice time :" + convertTime(time));
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "Current notice time :" + convertTime(time));
        //for (Element text : content) {
        //    System.out.println(text.text());
        //}
        String smsTitle = content.get(1).text();
        //String smsCompanyName = content.get(2).text();
        String smsNoticeTime = content.get(4).text();
        String preSMStext = content.get(3).text();
        String randomtext = "Placement/ Internship Form Description Files";
        int start = preSMStext.indexOf(randomtext) + randomtext.length() + 1;
        int twilio = "Sent from your Twilio trial account - ".length();
        int end = 150 - (smsTitle.length() + smsNoticeTime.length() + twilio + 2);
        String smsContent = preSMStext.substring(start, start + end);
        String sms = smsTitle + '\n' + smsNoticeTime + '\n' + smsContent;
        System.out.println(sms);
        sendSMS(sms);
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "SMS sent: " + sms);
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO,
                "Length of SMS is " + (sms.length() + twilio));
        System.out.println("\nLength of SMS is " + (sms.length() + twilio));
        System.out.println("");
    }
    //System.out.println(notices.toString());
}

From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java

@RequestMapping("/uefa/teams")
public List<Team> teams() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Team> result = new ArrayList<Team>();
    String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html";

    //On se connecte au site et on charge le document html

    Document doc;// w  w w .ja  v a2s . c  o m
    try {
        doc = Jsoup.connect(uri).get();
        Elements elements = doc.getElementsByClass("table");
        for (Element element : elements) {
            Element tbody = element.getElementsByTag("tbody").first();
            for (Element child : tbody.children()) {
                Element teamNameElement = child.getElementsByClass("tbl-teamname").first();
                String name = teamNameElement.text();
                String countryCode = child.getElementsByClass("tbl-countrycode").first().text();
                String imgUrl = teamNameElement.select("img").first().absUrl("src");
                Team team = new Team();
                team.setName(name);
                team.setCountryCode(countryCode);
                team.setImgUrl(imgUrl);
                team.setNationalTeam(true);
                result.add(team);
            }
        }

        //String titre =  element.text();
    } catch (IOException e) {
        e.printStackTrace();
    }

    //        RestTemplate restTemplate = new RestTemplate();
    //        ResponseEntity<ChampionListDto> response = restTemplate.getForEntity(
    //                uri,
    //                ChampionListDto.class);
    //
    //        List<ChampionDto> champions = response.getBody().getChampions();
    //        return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList());
    result.sort((t1, t2) -> t1.getName().compareTo(t2.getName()));
    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:github.srlee309.lessWrongBookCreator.scraper.PostSectionExtractor.java

/**
 * Saves all images in the given Element to a local newUrl and converts the src for all img tags to the local file
 * @param postContent - from which to extract the images
*///from w w w  .j a  v  a 2  s.c om
protected final void convertImagesToLocal(Element postContent) {
    Elements imgs = postContent.getElementsByTag("img");
    for (Element img : imgs) {
        String src = img.absUrl("src");
        String folder = "htmlOutput";

        int indexName = src.lastIndexOf("/");
        String name = src;
        if (indexName != -1) {
            indexName = src.lastIndexOf("/") + 1;
            name = src.substring(indexName, src.length());
        }
        img.attr("src", name);
        saveImage(src, folder, name);
    }
}

From source file:com.example.muzei.muzeiapod.ApodNasaArtSource.java

@Override
protected void onTryUpdate(int reason) throws RetryException {
    URI topUri;/*ww  w . j  a va 2s.  co  m*/
    try {
        topUri = new URI("http://apod.nasa.gov/");
    } catch (URISyntaxException e) {
        return;
    }

    URI mainUri = topUri.resolve("/apod/astropix.html");
    String bodyStr = getURLContent(mainUri.toString());

    /* TODO code below should go to a separate method/class */

    /* start parsing page */
    Document doc = Jsoup.parse(bodyStr);
    Element body = doc.body();

    /* get image URI */
    Element firstCenterTag = body.child(0);
    Element imgAnchor = firstCenterTag.getElementsByTag("a").last();
    Element img = imgAnchor.getElementsByTag("img").first();
    URI bigImageUri = topUri.resolve("/apod/" + img.attr("src"));
    String uri = bigImageUri.toString();

    /* get title */
    Element secondCenterTag = body.child(1);
    Element titleElem = secondCenterTag.child(0);
    String title = titleElem.text();

    /* get byline */
    String secondCenterText = secondCenterTag.text();
    /* byline: everything after 'title' above */
    int idx = secondCenterText.lastIndexOf(title) + title.length();
    String byline = secondCenterText.substring(idx).trim();

    /* TODO figure out the permanent link */
    String link = "http://apod.nasa.gov/apod/astropix.html";

    publishArtwork(new Artwork.Builder().title(title).byline(byline).imageUri(Uri.parse(uri)).token(title)
            .viewIntent(new Intent(Intent.ACTION_VIEW, Uri.parse(link))).build());
    scheduleUpdate(System.currentTimeMillis() + ROTATE_TIME_MILLIS);
}

From source file:HttpCilentExample.HttpCilentExample.java

public List<NameValuePair> getFormParams(String html, String username, String password)
        throws UnsupportedEncodingException {

    System.out.println("Extracting form's data...");

    Document doc = Jsoup.parse(html);

    // Google form id
    Element loginform = doc.getElementById("gaia_loginform");
    Elements inputElements = loginform.getElementsByTag("input");

    List<NameValuePair> paramList = new ArrayList<NameValuePair>();

    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");

        if (key.equals("Email"))
            value = username;/*from  w ww  . j a  v  a  2  s  . com*/
        else if (key.equals("Passwd"))
            value = password;

        paramList.add(new BasicNameValuePair(key, value));

    }

    return paramList;
}

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * cleans up and converts any nodes that should be considered text into text
 *//*from   w w  w .  j a  va  2 s.  c om*/
private void convertLinksToText() {
    if (logger.isDebugEnabled()) {
        logger.debug("Turning links to text");
    }
    Elements links = topNode.getElementsByTag("a");
    for (Element item : links) {
        if (item.getElementsByTag("img").size() == 0) {
            TextNode tn = new TextNode(item.text(), topNode.baseUri());
            item.replaceWith(tn);
        }
    }
}

From source file:ru.neverdark.yotta.parser.YottaParser.java

private void parseTable(Element table, String arrayType) {
    String array = null;/*from www.j a  v a  2s.c o  m*/

    if (arrayType.equals("YB-16S3EF8")) {
        array = table.getElementsByAttributeValue("colspan", "6").get(0).text();
    } else if (arrayType.equals("Y3-24S6DF8")) {
        array = table.getElementsByAttributeValue("colspan", "9").get(0).text();
    } else if (arrayType.equals("Y3-16S6SF8p")) {
        array = table.getElementsByAttributeValue("colspan", "10").get(0).text();
    }

    List<Disk> disks = new ArrayList<Disk>();

    Elements trs = table.getElementsByAttributeValue("bgcolor", "FFFFDB");
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        String slot = tds.get(0).text();
        String usage = tds.get(1).text();
        String capacity = tds.get(2).text();
        String model = tds.get(3).text();

        Disk disk = new Disk();
        disk.setSlot(slot);
        disk.setUsage(usage);
        disk.setCapacity(capacity);
        disk.setModel(model);

        disks.add(disk);
    }
    mEnclosuresDisk.put(array, disks);
}

From source file:com.dajodi.scandic.JSoupScraper.java

@Override
public Map<String, String> scrapeFormInputFields(InputStream inStream) {

    try {/*from  w  ww  .  ja v  a 2 s.  c  o  m*/
        Document doc = Jsoup.parse(inStream, HTTP.UTF_8, "");

        Element form = doc.body().getElementById("aspnetForm");

        Elements inputNodes = form.getElementsByTag("input");
        Map<String, String> inputMap = new HashMap<String, String>();

        for (Element element : inputNodes) {

            String name = element.attr("name");
            String value = element.attr("value");

            if (name != null) {
                inputMap.put(name, value == null ? "" : value);
            } else {
                //TODO: remove me
                Log.d("Something weird");
            }
        }

        doc.empty();
        return inputMap;
    } catch (Exception e) {
        throw new ScandicHtmlException(e);
    }
}