List of usage examples for org.jsoup.nodes Element getElementsByTag
public Elements getElementsByTag(String tagName)
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static List<DocBlock> getDocBlock(String jdocBase, Element elem, ClassDocumentation reference) { if (elem != null) { String baseLink = JDocUtil.getLink(jdocBase, reference); List<DocBlock> blocks = new ArrayList<>(10); String hashLink = null;/*from w w w . ja v a2 s. c om*/ for (elem = elem.nextElementSibling(); elem != null; elem = elem.nextElementSibling()) { if (elem.tagName().equals("a")) { hashLink = '#' + elem.attr("name"); } else if (elem.tagName().equals("ul")) { Element tmp = elem.getElementsByTag("h4").first(); String title = JDocUtil.fixSpaces(tmp.text().trim()); String description = "", signature = ""; OrderedMap<String, List<String>> fields = new ListOrderedMap<>(); for (; tmp != null; tmp = tmp.nextElementSibling()) { if (tmp.tagName().equals("pre")) { //contains full signature signature = JDocUtil.fixSpaces(tmp.text().trim()); } else if (tmp.tagName().equals("div") && tmp.className().equals("block")) { //main block of content (description or deprecation) Element deprecationElem = tmp.getElementsByClass("deprecationComment").first(); if (deprecationElem != null) { //deprecation block fields.put("Deprecated:", Collections .singletonList(JDocUtil.formatText(deprecationElem.html(), baseLink))); } else { //description block description = JDocUtil.formatText(tmp.html(), baseLink); } } else if (tmp.tagName().equals("dl")) { //a field String fieldName = null; List<String> fieldValues = new ArrayList<>(); for (Element element : tmp.children()) { if (element.tagName().equals("dt")) { if (fieldName != null) { fields.put(fieldName, fieldValues); fieldValues = new ArrayList<>(); } fieldName = JDocUtil.fixSpaces(element.text().trim()); } else if (element.tagName().equals("dd")) { fieldValues.add(JDocUtil.formatText(element.html(), baseLink)); } } if (fieldName != null) { fields.put(fieldName, fieldValues); } } } blocks.add(new DocBlock(title, hashLink, signature, description, fields)); } } return blocks; } return null; }
From source file:com.osw.enderecamento.util.EnderecamentoParseUtil.java
/** * extract info about a address/*from www .ja va 2s . c om*/ * * @param html * @return address list */ public List<Enderecamento> parseEnderecamento(String html) { Document docHtml = Jsoup.parse(html); Elements tagTr = docHtml.getElementsByTag("tr"); List<Enderecamento> enderecamentos = new LinkedList<Enderecamento>(); for (Element element : tagTr) { if (element.getElementsByTag("td").size() > 0) { Elements tahTd = element.getElementsByTag("td"); Enderecamento enderecamento = new Enderecamento(); enderecamento.setLogradouro(tahTd.get(0).text()); enderecamento.setBairro(tahTd.get(1).text()); enderecamento.setCidade(tahTd.get(2).text()); enderecamento.setCep(tahTd.get(3).text()); enderecamentos.add(enderecamento); } } return enderecamentos; }
From source file:popo.defcon.MsgMeCDC.java
void Parse() { String input = readPage();/* ww w .j av a 2 s. c om*/ if (input == null) { System.out.println("Error connecting to Internet"); return; } String time; Document cdc = Jsoup.parse(input); Elements notices = cdc.getElementsByTag("tbody"); Elements alerts = notices.get(1).getElementsByTag("tr"); alerts.remove(0); System.out.println("Current Old Time = " + oldtime); for (Element node : alerts) { Elements content = node.getElementsByTag("td"); time = content.last().text(); if (convertTime(time).compareTo(convertTime(oldtime)) <= 0) { MsgMeCDC.oldtime = alerts.get(0).getElementsByTag("td").last().text(); return; } System.out.println("Current notice time :" + convertTime(time)); Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "Current notice time :" + convertTime(time)); //for (Element text : content) { // System.out.println(text.text()); //} String smsTitle = content.get(1).text(); //String smsCompanyName = content.get(2).text(); String smsNoticeTime = content.get(4).text(); String preSMStext = content.get(3).text(); String randomtext = "Placement/ Internship Form Description Files"; int start = preSMStext.indexOf(randomtext) + randomtext.length() + 1; int twilio = "Sent from your Twilio trial account - ".length(); int end = 150 - (smsTitle.length() + smsNoticeTime.length() + twilio + 2); String smsContent = preSMStext.substring(start, start + end); String sms = smsTitle + '\n' + smsNoticeTime + '\n' + smsContent; System.out.println(sms); sendSMS(sms); Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "SMS sent: " + sms); Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "Length of SMS is " + (sms.length() + twilio)); System.out.println("\nLength of SMS is " + (sms.length() + twilio)); System.out.println(""); } //System.out.println(notices.toString()); }
From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java
@RequestMapping("/uefa/teams") public List<Team> teams() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Team> result = new ArrayList<Team>(); String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html"; //On se connecte au site et on charge le document html Document doc;// w w w .ja v a2s . c o m try { doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("table"); for (Element element : elements) { Element tbody = element.getElementsByTag("tbody").first(); for (Element child : tbody.children()) { Element teamNameElement = child.getElementsByClass("tbl-teamname").first(); String name = teamNameElement.text(); String countryCode = child.getElementsByClass("tbl-countrycode").first().text(); String imgUrl = teamNameElement.select("img").first().absUrl("src"); Team team = new Team(); team.setName(name); team.setCountryCode(countryCode); team.setImgUrl(imgUrl); team.setNationalTeam(true); result.add(team); } } //String titre = element.text(); } catch (IOException e) { e.printStackTrace(); } // RestTemplate restTemplate = new RestTemplate(); // ResponseEntity<ChampionListDto> response = restTemplate.getForEntity( // uri, // ChampionListDto.class); // // List<ChampionDto> champions = response.getBody().getChampions(); // return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList()); result.sort((t1, t2) -> t1.getName().compareTo(t2.getName())); if (pseudoCache == null) pseudoCache = result; return result; }
From source file:github.srlee309.lessWrongBookCreator.scraper.PostSectionExtractor.java
/** * Saves all images in the given Element to a local newUrl and converts the src for all img tags to the local file * @param postContent - from which to extract the images *///from w w w .j a v a 2 s.c om protected final void convertImagesToLocal(Element postContent) { Elements imgs = postContent.getElementsByTag("img"); for (Element img : imgs) { String src = img.absUrl("src"); String folder = "htmlOutput"; int indexName = src.lastIndexOf("/"); String name = src; if (indexName != -1) { indexName = src.lastIndexOf("/") + 1; name = src.substring(indexName, src.length()); } img.attr("src", name); saveImage(src, folder, name); } }
From source file:com.example.muzei.muzeiapod.ApodNasaArtSource.java
@Override protected void onTryUpdate(int reason) throws RetryException { URI topUri;/*ww w . j a va 2s. co m*/ try { topUri = new URI("http://apod.nasa.gov/"); } catch (URISyntaxException e) { return; } URI mainUri = topUri.resolve("/apod/astropix.html"); String bodyStr = getURLContent(mainUri.toString()); /* TODO code below should go to a separate method/class */ /* start parsing page */ Document doc = Jsoup.parse(bodyStr); Element body = doc.body(); /* get image URI */ Element firstCenterTag = body.child(0); Element imgAnchor = firstCenterTag.getElementsByTag("a").last(); Element img = imgAnchor.getElementsByTag("img").first(); URI bigImageUri = topUri.resolve("/apod/" + img.attr("src")); String uri = bigImageUri.toString(); /* get title */ Element secondCenterTag = body.child(1); Element titleElem = secondCenterTag.child(0); String title = titleElem.text(); /* get byline */ String secondCenterText = secondCenterTag.text(); /* byline: everything after 'title' above */ int idx = secondCenterText.lastIndexOf(title) + title.length(); String byline = secondCenterText.substring(idx).trim(); /* TODO figure out the permanent link */ String link = "http://apod.nasa.gov/apod/astropix.html"; publishArtwork(new Artwork.Builder().title(title).byline(byline).imageUri(Uri.parse(uri)).token(title) .viewIntent(new Intent(Intent.ACTION_VIEW, Uri.parse(link))).build()); scheduleUpdate(System.currentTimeMillis() + ROTATE_TIME_MILLIS); }
From source file:HttpCilentExample.HttpCilentExample.java
public List<NameValuePair> getFormParams(String html, String username, String password) throws UnsupportedEncodingException { System.out.println("Extracting form's data..."); Document doc = Jsoup.parse(html); // Google form id Element loginform = doc.getElementById("gaia_loginform"); Elements inputElements = loginform.getElementsByTag("input"); List<NameValuePair> paramList = new ArrayList<NameValuePair>(); for (Element inputElement : inputElements) { String key = inputElement.attr("name"); String value = inputElement.attr("value"); if (key.equals("Email")) value = username;/*from w ww . j a v a 2 s . com*/ else if (key.equals("Passwd")) value = password; paramList.add(new BasicNameValuePair(key, value)); } return paramList; }
From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java
/** * cleans up and converts any nodes that should be considered text into text *//*from w w w . j a va 2 s. c om*/ private void convertLinksToText() { if (logger.isDebugEnabled()) { logger.debug("Turning links to text"); } Elements links = topNode.getElementsByTag("a"); for (Element item : links) { if (item.getElementsByTag("img").size() == 0) { TextNode tn = new TextNode(item.text(), topNode.baseUri()); item.replaceWith(tn); } } }
From source file:ru.neverdark.yotta.parser.YottaParser.java
private void parseTable(Element table, String arrayType) { String array = null;/*from www.j a v a 2s.c o m*/ if (arrayType.equals("YB-16S3EF8")) { array = table.getElementsByAttributeValue("colspan", "6").get(0).text(); } else if (arrayType.equals("Y3-24S6DF8")) { array = table.getElementsByAttributeValue("colspan", "9").get(0).text(); } else if (arrayType.equals("Y3-16S6SF8p")) { array = table.getElementsByAttributeValue("colspan", "10").get(0).text(); } List<Disk> disks = new ArrayList<Disk>(); Elements trs = table.getElementsByAttributeValue("bgcolor", "FFFFDB"); for (Element tr : trs) { Elements tds = tr.getElementsByTag("td"); String slot = tds.get(0).text(); String usage = tds.get(1).text(); String capacity = tds.get(2).text(); String model = tds.get(3).text(); Disk disk = new Disk(); disk.setSlot(slot); disk.setUsage(usage); disk.setCapacity(capacity); disk.setModel(model); disks.add(disk); } mEnclosuresDisk.put(array, disks); }
From source file:com.dajodi.scandic.JSoupScraper.java
@Override public Map<String, String> scrapeFormInputFields(InputStream inStream) { try {/*from w ww . ja v a 2 s. c o m*/ Document doc = Jsoup.parse(inStream, HTTP.UTF_8, ""); Element form = doc.body().getElementById("aspnetForm"); Elements inputNodes = form.getElementsByTag("input"); Map<String, String> inputMap = new HashMap<String, String>(); for (Element element : inputNodes) { String name = element.attr("name"); String value = element.attr("value"); if (name != null) { inputMap.put(name, value == null ? "" : value); } else { //TODO: remove me Log.d("Something weird"); } } doc.empty(); return inputMap; } catch (Exception e) { throw new ScandicHtmlException(e); } }