List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:me.vertretungsplan.parser.DSBLightParser.java
private void parseProgram(String url, String html, SubstitutionSchedule schedule, Map<String, String> referer, String firstUrl) throws IOException, JSONException, CredentialInvalidException { Document doc = Jsoup.parse(html, url); if (doc.select("iframe").attr("src").equals(firstUrl) || doc.select("iframe").size() == 0) { return;//from w w w. j av a2s . c o m } for (Element iframe : doc.select("iframe")) { // Data parseDay(iframe.attr("src"), referer, schedule, iframe.attr("src")); } if (firstUrl == null) { firstUrl = doc.select("iframe").attr("src"); } if (doc.select("#hlNext").size() > 0) { String nextUrl = doc.select("#hlNext").first().attr("abs:href"); try { String response = httpGet(nextUrl, ENCODING, referer); parseProgram(response, nextUrl, schedule, referer, firstUrl); } catch (HttpResponseException ignored) { } } if (html.contains("Timer1")) { List<Connection.KeyVal> formData = ((FormElement) doc.select("form").first()).formData(); List<NameValuePair> formParams = new ArrayList<>(); for (Connection.KeyVal kv : formData) { formParams.add(new BasicNameValuePair(kv.key(), kv.value())); } formParams.add(new BasicNameValuePair("__EVENTTARGET", "Timer1")); formParams.add(new BasicNameValuePair("__EVENTARGUMENT", "")); String response = httpPost(url, ENCODING, formParams, referer); parseProgram(url, response, schedule, referer, firstUrl); } }
From source file:edu.rowan.app.carousel.CarouselFetch.java
@Override protected CarouselFeature[] doInBackground(Void... params) { String rowanURL = "http://rowan.edu"; ArrayList<CarouselFeature> cfeatures = new ArrayList<CarouselFeature>(); long lastUpdated = prefs.getLong(LAST_UPDATE, -1); if (lastUpdated > 0) { long timeDiff = Calendar.getInstance().getTimeInMillis() - lastUpdated; int hours = (int) (timeDiff / (60 * 60 * 1000)); if (hours < UPDATE_INTERVAL) { // just load saved features cfeatures.addAll(loadFeaturesFromPreferences()); // System.out.println("Loaded features from prefernces"); return cfeatures.toArray(new CarouselFeature[cfeatures.size()]); }// ww w .ja va 2 s . com } // ELSE: Attempt to update // but check if we have available connection try { // Download + Parse Rowan's homepage for features //Toast.makeText(context, "Updating CarouselView", Toast.LENGTH_SHORT).show(); DOUH CAN"T DO THIS Document document = Jsoup.connect(rowanURL).get(); Elements features = document.select(".feature "); for (Element feature : features) { String title = feature.select(".title a span").first().text(); String description = feature.select(".description a").first().text(); Element link = feature.select("a").first(); String linkURL = link.attr("abs:href"); String imageURL = link.select("img").first().attr("abs:src"); CarouselFeature cFeature = new CarouselFeature(title, description, linkURL, imageURL, RECEIVER, context); cfeatures.add(cFeature); } saveDataToPreferences(cfeatures); } catch (IOException e1) { e1.printStackTrace(); return null; } return cfeatures.toArray(new CarouselFeature[cfeatures.size()]); }
From source file:lolth.autohome.buy.AutohomeBuyInfoListTaskFetch.java
@Override protected void parsePage(Document doc, FetchTask task) throws Exception { Elements lis = doc.select("li.price-item"); for (Element li : lis) { AutohomeBuyInfoBean bean = new AutohomeBuyInfoBean(); bean.setUrl(task.getUrl());/*from w ww .ja v a 2 s . co m*/ bean.setForumId(task.getExtra()); // post id Elements id = li.select("div.price-share a.share"); if (!id.isEmpty()) { String idStr = id.first().attr("data-target"); idStr = StringUtils.substringAfterLast(idStr, "_"); if (StringUtils.isBlank(idStr)) { continue; } bean.setId(idStr); } // Elements user = li.select("div.user-name a"); if (!user.isEmpty()) { String userUrl = user.first().absUrl("href"); String userId = StringUtils.substringAfterLast(userUrl, "/"); String userName = user.first().text(); bean.setUserId(userId); bean.setUserUrl(userUrl); bean.setUserName(userName); } // ? Elements postTime = li.select("div.user-name span"); if (!postTime.isEmpty()) { bean.setPostTime(StringUtils.trim(StringUtils.substringBefore(postTime.first().text(), "?"))); } Elements dataLis = li.select("div.price-item-bd li"); for (Element dataLi : dataLis) { String data = dataLi.text(); if (StringUtils.startsWith(data, "")) { bean.setCar(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setGuidePrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setTotalPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPurchaseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setCommercialInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setVehicleUseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setCompulsoryInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setLicenseFee(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setPromotion(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setBuyTime(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { String area = StringUtils.trim(StringUtils.substringAfter(data, "")); String[] pAndC = StringUtils.splitByWholeSeparator(area, ",", 2); if (pAndC.length == 1) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[0]); } if (pAndC.length == 2) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[1]); } } if (StringUtils.startsWith(data, "")) { Elements level = dataLi.select("span.level"); // if (!level.isEmpty()) { bean.setSellerComment(level.first().text()); } // ? Elements seller = dataLi.select("a.title"); if (!seller.isEmpty()) { String sellerUrl = seller.first().absUrl("href"); String sellerName = seller.first().text(); String sellerId = StringUtils.substringAfterLast(sellerUrl, "/"); bean.setSellerId(sellerId); bean.setSellerName(sellerName); bean.setSellerUrl(sellerUrl); } // ? Elements sellerPhone = dataLi.select("em.phone-num"); if (!sellerPhone.isEmpty()) { bean.setSellerPhone(sellerPhone.first().text()); } // ? // Elements sellerAddress = dataLi.select("em.phone-num"); } if (StringUtils.startsWith(data, "?")) { bean.setBuyFeeling(StringUtils.trim(StringUtils.substringAfter(data, ""))); } } log.debug("Bean : {}", bean); bean.persistOnNotExist(); } }
From source file:me.vertretungsplan.parser.TurboVertretungParser.java
private void parseTurboVertretungDay(SubstitutionSchedule v, Document doc) { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); String date = doc.select(".Titel").text().replaceFirst("Vertretungsplan( fr)? ", ""); day.setDate(DateTimeFormat.forPattern("EEEE, d. MMMM yyyy").withLocale(Locale.GERMAN).parseLocalDate(date)); String lastChange = doc.select(".Stand").text().replace("Stand: ", ""); day.setLastChange(DateTimeFormat.forPattern("dd.MM.yyyy HH:mm:ss").withLocale(Locale.GERMAN) .parseLocalDateTime(lastChange)); if (doc.text().contains("Kein Vertretungsplan")) { v.addDay(day);//from ww w .j a va2s . co m return; } if (doc.select(".LehrerFrueher").size() > 0) { day.addMessage(doc.select(".LehrerFrueherLabel").text() + "\n" + doc.select(".LehrerFrueher").text()); } if (doc.select(".LehrerVerplant").size() > 0) { day.addMessage(doc.select(".LehrerVerplantLabel").text() + "\n" + doc.select(".LehrerVerplant").text()); } if (doc.select(".Abwesenheiten-Klassen").size() > 0) { day.addMessage(doc.select(".Abwesenheiten-KlassenLabel").text() + "\n" + doc.select(".Abwesenheiten-Klassen").text()); } Element table = doc.select("table").first(); for (Element row : table.select("tr:has(td)")) { Substitution substitution = new Substitution(); substitution.setLesson(row.select(query("Stunde")).text()); substitution.setPreviousTeacher(row.select(query("Lehrer")).text()); substitution.setTeacher(row.select(query("Vertretung")).text()); substitution.setClasses(new HashSet<>(Arrays.asList(row.select(query("Klasse")).text().split(" ")))); substitution.setSubject(row.select(query("Fach")).text()); substitution.setDesc(row.select(query("Anmerkung")).text()); substitution.setRoom(row.select(query("Raum")).text()); String type = recognizeType(row.select(query("Anmerkung")).text()); if (type == null) type = "Vertretung"; substitution.setType(type); substitution.setColor(colorProvider.getColor(type)); day.addSubstitution(substitution); } v.addDay(day); }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
static void parse(final String jdocBase, final String name, final InputStream inputStream, Map<String, ClassDocumentation> docs) { final String[] pathSplits = name.split("/"); final String fileName = pathSplits[pathSplits.length - 1]; if (!Character.isUpperCase(fileName.charAt(0))) { //ignore jdoc structure html return;/*from w w w. j a v a2 s . c om*/ } final String[] nameSplits = fileName.split("\\."); final String className = nameSplits[nameSplits.length - 2]; final String fullName = fileName.substring(0, fileName.length() - nameSplits[nameSplits.length - 1].length() - 1); try (BufferedReader buffer = new BufferedReader(new InputStreamReader(inputStream))) { //create dom Document final String content = buffer.lines().collect(Collectors.joining("\n")); Document document = Jsoup.parse(content); //classDocument (classname, package, description) Element titleElem = getSingleElementByClass(document, "title"); final String classSig = JDocUtil.fixSpaces(titleElem.text()); Element packageElem = titleElem.previousElementSibling(); if (packageElem.children().size() > 1) { packageElem = packageElem.children().last(); } final String pack = JDocUtil.fixSpaces(packageElem.text()); final String link = JDocUtil.getLink(jdocBase, pack, fullName); Element descriptionElement = null; Elements descriptionCandidates = document.select(".description .block"); if (descriptionCandidates.size() > 1) { List<Element> removed = descriptionCandidates.stream().map(elem -> elem.child(0)) .filter(child -> child != null && !child.className().startsWith("deprecat")) .map(Element::parent).collect(Collectors.toList()); if (removed.size() != 1) throw new RuntimeException("Found too many description candidates"); descriptionElement = removed.get(0); } else if (descriptionCandidates.size() == 1) { descriptionElement = descriptionCandidates.get(0); } final String description = descriptionElement == null ? "" : JDocUtil.formatText(descriptionElement.html(), link); final ClassDocumentation classDoc = new ClassDocumentation(pack, fullName, classSig, description, classSig.startsWith("Enum")); //methods, fields final Element details = document.getElementsByClass("details").first(); if (details != null) { //methods Element tmp = getSingleElementByQuery(details, "a[name=\"method.detail\"]"); List<DocBlock> docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { Set<MethodDocumentation> mdocs = classDoc.methodDocs .computeIfAbsent(block.title.toLowerCase(), key -> new HashSet<>()); mdocs.add(new MethodDocumentation(classDoc, block.signature, block.hashLink, block.description, block.fields)); } } //vars tmp = getSingleElementByQuery(details, "a[name=\"field.detail\"]"); docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc, block.title, block.hashLink, block.signature, block.description)); } } //enum-values tmp = getSingleElementByQuery(details, "a[name=\"enum.constant.detail\"]"); docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc, block.title, block.hashLink, block.signature, block.description)); } } } final Element methodSummary = getSingleElementByQuery(document, "a[name=\"method.summary\"]"); classDoc.inheritedMethods.putAll(getInheritedMethods(methodSummary)); //storing if (nameSplits.length > 2) { if (!docs.containsKey(nameSplits[0].toLowerCase())) docs.put(nameSplits[0].toLowerCase(), new ClassDocumentation(null, null, null, null, false)); ClassDocumentation parent = docs.get(nameSplits[0].toLowerCase()); for (int i = 1; i < nameSplits.length - 2; i++) { if (!parent.subClasses.containsKey(nameSplits[i].toLowerCase())) parent.subClasses.put(nameSplits[i].toLowerCase(), new ClassDocumentation(null, null, null, null, false)); parent = parent.subClasses.get(nameSplits[i].toLowerCase()); } if (parent.subClasses.containsKey(className.toLowerCase())) classDoc.subClasses.putAll(parent.subClasses.get(className.toLowerCase()).subClasses); parent.subClasses.put(className.toLowerCase(), classDoc); } if (docs.containsKey(fullName.toLowerCase())) { ClassDocumentation current = docs.get(fullName.toLowerCase()); if (current.classSig != null) throw new RuntimeException("Got a class-name conflict with classes " + classDoc.classSig + "(" + classDoc.className + ") AND " + current.classSig + "(" + current.className + ")"); classDoc.subClasses.putAll(current.subClasses); } docs.put(fullName.toLowerCase(), classDoc); } catch (final IOException | NullPointerException ex) { JDocUtil.LOG.error("Got excaption for element {}", fullName, ex); } try { inputStream.close(); } catch (final IOException e) { JDocUtil.LOG.error("Error closing inputstream", e); } }
From source file:com.vaushell.superpipes.tools.http.ImageExtractor.java
/** * Return the biggest image URI of this webpage. * * @param rootURI Webpage URI/* w w w .j a v a 2 s.c om*/ * @return Biggest image * @throws IOException */ public BufferedImage extractBiggest(final URI rootURI) throws IOException { final List<URI> imagesURIs = new ArrayList<>(); HttpEntity responseEntity = null; try { // Exec request final HttpGet get = new HttpGet(rootURI); try (final CloseableHttpResponse response = client.execute(get)) { final StatusLine sl = response.getStatusLine(); if (sl.getStatusCode() != 200) { throw new IOException(sl.getReasonPhrase()); } responseEntity = response.getEntity(); try (final InputStream is = responseEntity.getContent()) { final Document doc = Jsoup.parse(is, "UTF-8", rootURI.toString()); final Elements elts = doc.select("img"); if (elts != null) { for (final Element elt : elts) { final String src = elt.attr("src"); if (src != null && !src.isEmpty()) { try { imagesURIs.add(rootURI.resolve(src)); } catch (final IllegalArgumentException ex) { // Ignore wrong encoded URI } } } } } } } finally { if (responseEntity != null) { EntityUtils.consume(responseEntity); } } final BufferedImage[] images = new BufferedImage[imagesURIs.size()]; final ExecutorService service = Executors.newCachedThreadPool(); for (int i = 0; i < imagesURIs.size(); ++i) { final int num = i; service.execute(new Runnable() { @Override public void run() { try { images[num] = HTTPhelper.loadPicture(client, imagesURIs.get(num)); } catch (final IOException ex) { images[num] = null; } } }); } service.shutdown(); try { service.awaitTermination(1L, TimeUnit.DAYS); } catch (final InterruptedException ex) { // Ignore } BufferedImage biggest = null; int biggestSize = Integer.MIN_VALUE; for (int i = 0; i < imagesURIs.size(); ++i) { if (images[i] != null) { final int actualSize = images[i].getWidth() * images[i].getHeight(); if (actualSize > biggestSize) { biggest = images[i]; biggestSize = actualSize; } } } return biggest; }
From source file:neembuu.release1.externalImpl.linkhandler.VimeoLinkHandlerProvider.java
/** * Find data url./* w w w . ja v a2 s. c om*/ * @param vimeoUrl The vimeo url. */ private void findData(String vimeoUrl) { try { //Find data config url final String response = NHttpClientUtils.getData(vimeoUrl, httpClient); Document doc = Jsoup.parse(response); //Find title title = doc.select("meta[property=og:title]").attr("content"); //Find data-config-url dataConfigUrl = doc.select("div.player").attr("data-config-url"); dataConfigUrl = dataConfigUrl.replaceAll("&", "&"); System.out.println("Dataconfigurl: " + dataConfigUrl); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:org.brunocvcunha.taskerbox.impl.jobs.MonsterJobSeeker.java
@Override protected void execute() throws Exception { try {/*from w w w . j av a 2s . co m*/ for (int x = 1; x < this.maxPages; x++) { int uniqueCount = 0; // DefaultHttpClient client = // TaskerboxHttpBox.getInstance().buildNewHttpClient(); String seekUrl = "http://jobsearch.monster." + this.site + "/search/?q=" + URLEncoder.encode(this.search) + "&sort=dt.rv.di&pg=" + x; logInfo(log, "... Seeking " + seekUrl); HttpEntity entity = TaskerboxHttpBox.getInstance().getEntityForURL(seekUrl); String result = TaskerboxHttpBox.getInstance().readResponseFromEntity(entity); if (result.contains("Sorry, no jobs were found that match your criteria")) { System.err.println("Busca encerrada."); this.bootstrapHttpClient(true); break; // return; } try { Document doc = Jsoup.parse(result); Elements el = doc.select("table.listingsTable").select("tr"); for (val item : el) { Elements jobTitleEl = item.select("div.jobTitleContainer"); Elements companyEl = item.select("div.companyContainer"); Elements locationEl = item.select("div.jobLocationSingleLine"); // aaa String url = jobTitleEl.select("a").attr("href"); if (url.equals("")) { continue; } if (url.contains("?mescoid")) { url = url.substring(0, url.indexOf("?mescoid")); } if (url.contains("?jobPosition")) { url = url.substring(0, url.indexOf("?jobPosition")); } if (url.contains("&jobPosition")) { url = url.substring(0, url.indexOf("&jobPosition")); } String company = ""; if (!companyEl.select("a").isEmpty()) { company = companyEl.select("a").get(0).attr("title"); } handleJob(jobTitleEl.text(), company, locationEl.select("a").text(), url); uniqueCount++; } if (uniqueCount == 0) { logInfo(log, "MONSTER BREAK -- NO UNIQUE COUNT"); break; } try { Thread.sleep(10000L); } catch (InterruptedException e) { e.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.hmzb.test.HttpClientTest.java
@Test public final void testPMS() throws IOException { // Map<String, String> data = new HashMap<String, String>(); // data.put("act", "module"); // data.put("name", "sns"); // data.put("do", "post"); // data.put("id", "137"); // data.put("replyid", ""); // data.put("postid", "150"); // data.put("reply_content", "?, ?"); String url = "http://pms.local.17173.com/task_list_department.php?action=search&employment_id=&state=0&time_id=plan&start_date=2014-01-01&end_date=2014-05-16&x=24&y=5"; String cookieValue = "SUV=1381469482625841; NUV=1381507200000; sohutag=8HsmeSc5NCwmcyc5NCwmYjc5NCwmYSc5NCwmZjc5NCwmZyc5Njwmbjc5NCwmaSc5NCwmdyc5NCwmaCc5NCwmYyc5NCwmZSc5NCwmbSc5NH0; __utma=113262040.1666690635.1382600575.1382600575.1382600575.1; vjuids=c639cb6b1.142370b45c7.0.ef4cedbb; Hm_lvt_0245ebe4fb30a09e371e4f011dec1f6a=1388137801; live_17173_unique=e7de7aed49953586fc1da607967cf847; _ga=GA1.2.1666690635.1382600575; pgv_pvi=2611450780; vjlast=1383902955.1399958818.22; ermpdockData=1,2,4,13,17; DIFF=1400117702510; IPLOC=CN3501; ErmpToken=Q1k1MzIw; ErmpTicket=MTAuNS4xNS4xNg; ppinf=2|1401269453|1402479053|bG9naW5pZDowOnx1c2VyaWQ6MTY6cHR6aHVmQDE3MTczLmNvbXxzZXJ2aWNldXNlOjMwOjAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMHxjcnQ6MTA6MjAxMi0xMS0yMHxlbXQ6MTowfGFwcGlkOjQ6MTA3N3x0cnVzdDoxOjF8cGFydG5lcmlkOjE6MHxyZWxhdGlvbjowOnx1dWlkOjE2OmRiYmNhNTA3ZjNmMjRjMnR8dWlkOjk6czg3MDM4OTcwfHVuaXFuYW1lOjQ0OiVFNiU5MCU5QyVFNyU4QiU5MCVFNyVCRCU5MSVFNSU4RiU4QjMxNDI4NjcxfA; pprdig=Hs7tIw6klJdNasYa5mYo4aOzZnr2dL96PkIAMo8K4KGp4UM2yhx2LHuNOZ5zX7s4pKShi4GnXYFIIyAW-BWRJCAgmI2qeorvqshYjT5gs4gWKGgJNtoQAbdIt1liIK-Bt1aX_mYueEHUA_yRDVhRxRVLVt3mtlgywukd-stCIOE; lastdomain=1402479053|cHR6aHVmQDE3MTczLmNvbXw|17173.com; PHPSESSID=qcr7raandp6l0k7g9vpg0lgn22; PMS_cypms_username=fuzhu; PMS_cypms_auth=c0b47dad95a0e7ef7505d9ce057b6651"; Document resultDoc = Jsoup.connect(url).header("cookie", cookieValue).timeout(20000).get(); Elements table = resultDoc.select("table.list"); Elements trs = table.select("tr"); // //from ww w .j a va2 s . co m trs.remove(0); // ?? trs.remove(trs.size() - 1); // Double totalTime = 0d; String regex = ".*?.*"; for (Element element : trs) { Elements tds = element.select("td"); // System.out.println(tds); String projectName = tds.get(3).text(); Double realTime = Double.valueOf(tds.get(7).text()); if (projectName.matches(regex)) { totalTime += realTime; } } System.out.println(totalTime); }
From source file:br.gov.jfrj.siga.base.SigaHTTP.java
private String getAttributeActionFromHtml(String htmlContent) { Document doc = Jsoup.parse(htmlContent); return doc.select("form").attr("action"); }