List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:sample.ui.mvc.MessageController.java
private String getBidId(Message message) { try {//from w w w .j a v a2 s. c o m BasicCookieStore cookieStore = new BasicCookieStore(); CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(cookieStore).build(); doLogin(cookieStore, httpclient, ZHANGDAIYIXIAN); // String bidName = message.getBidName(); // time // String mainUrl = "http://www.wujinsuo.cn:80/index.php"; HttpGet httpget = new HttpGet(mainUrl); httpget.addHeader("Accept", ACCEPT); httpget.addHeader("User-Agent", AGENT); ResponseHandler<String> responseHandler = new ResponseHandler<String>() { public String handleResponse(final HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status >= 200 && status < 300) { HttpEntity entity = response.getEntity(); return entity != null ? EntityUtils.toString(entity) : null; } else { throw new ClientProtocolException("Unexpected response status: " + status); } } }; String resultString = httpclient.execute(httpget, responseHandler); // parse html Document doc = Jsoup.parse(resultString); Elements links = doc.select("a[href]"); Element aElement = null; for (Element e : links) { List<Node> childNode = e.childNodes(); if (childNode.size() != 1) continue; Node node = childNode.get(0); if ("span".equals(node.nodeName())) { String html = node.outerHtml(); logger.info(html); if (html.contains(bidName)) { // okle aElement = e; } } } if (aElement == null) { // retry return ""; } else { String href = aElement.attr("href"); String bidId = StringUtils.substringAfter(href, "id="); logger.info(bidId); return bidId; } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:com.github.binlee1990.transformers.spider.PersonCrawler.java
@Override public void visit(Page page) { int docid = page.getWebURL().getDocid(); String url = page.getWebURL().getURL(); logger.info(url);//from w ww . j a va2s . c o m if (!url.startsWith("http://www.javlibrary.com/cn/?v=jav")) { return; } if (page.getParseData() instanceof HtmlParseData) { HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); String html = htmlParseData.getHtml(); Document doc = Jsoup.parse(html); String videoIdentificationCode = doc.select("div#video_id td.text").first().text().toString(); Video queryVideo = new Video(); queryVideo.setIdentificationCode(videoIdentificationCode); Video video = videoMapper.queryByVideo(queryVideo); if (null != video) { return; } video = new Video(); video.setUrl(url); Date now = new Date(); video.setCreateTime(now); video.setUpdateTime(now); String title = doc.select("div#video_title a").first().text().toString(); video.setTitle(title); video.setIdentificationCode(videoIdentificationCode); Elements rdElements = doc.select("div#video_date td.text"); if (CollectionUtils.isNotEmpty(rdElements)) { String releaseDate = rdElements.first().text().toString(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); try { Date date = sdf.parse(releaseDate); video.setReleaseDate(date); } catch (ParseException e) { } } Elements dmElements = doc.select("div#video_length span.text"); if (CollectionUtils.isNotEmpty(dmElements)) { String durationMinutes = dmElements.first().text().toString(); video.setDurationMinutes(Integer.valueOf(durationMinutes)); } Elements dElements = doc.select("div#video_director td.text"); if (CollectionUtils.isNotEmpty(dElements)) { String director = dElements.first().text().toString(); video.setDirector(director); } Elements pElements = doc.select("div#video_maker td.text"); if (CollectionUtils.isNotEmpty(pElements)) { String producer = pElements.first().text().toString(); video.setProducer(producer); } Elements disElements = doc.select("div#video_label td.text"); if (CollectionUtils.isNotEmpty(disElements)) { String distributor = disElements.first().text().toString(); video.setDistributor(distributor); } Elements countElements = doc.select("div#video_favorite_edit span"); if (CollectionUtils.isNotEmpty(countElements)) { Elements countWantedElements = countElements.select("#subscribed a"); if (CollectionUtils.isNotEmpty(countWantedElements)) { String countWanted = countWantedElements.first().text(); try { video.setCountWanted(Integer.valueOf(countWanted)); } catch (Exception e) { } } Elements countWatchedElements = countElements.select("#watched a"); if (CollectionUtils.isNotEmpty(countWatchedElements)) { String countWatched = countWatchedElements.first().text(); try { video.setCountWatched(Integer.valueOf(countWatched)); } catch (Exception e) { } } Elements countOwnedElements = countElements.select("#owned a"); if (CollectionUtils.isNotEmpty(countOwnedElements)) { String countOwned = countOwnedElements.first().text(); try { video.setCountOwned(Integer.valueOf(countOwned)); } catch (Exception e) { } } } Elements sElements = doc.select("div#video_review td.text span.score"); if (CollectionUtils.isNotEmpty(sElements)) { String score = sElements.first().text().toString(); score = StringUtils.replace(score, "(", ""); score = StringUtils.replace(score, ")", ""); if (StringUtils.isNotBlank(score)) { try { video.setScore(Float.valueOf(score)); } catch (Exception e) { } } } Elements actressElements = doc.select("div#video_cast span.star"); if (CollectionUtils.isNotEmpty(actressElements)) { if (actressElements.size() <= 1) { video.setSingleFemaleFlag(true); } else { video.setSingleFemaleFlag(false); } } videoMapper.insertSelective(video); int videoId = videoMapper.queryByVideo(video).getId(); logger.info("handle " + videoId + "\n" + JSON.toJSONString(video)); if (CollectionUtils.isNotEmpty(actressElements)) { actressElements.stream().forEach(a -> { String aName = a.text().toString().trim(); if (StringUtils.isNotBlank(aName)) { Actress queryActress = new Actress(); queryActress.setName(aName); Actress actress = actressMapper.queryByActress(queryActress); if (null != actress) { VideoActress va = new VideoActress(); va.setActressCode(actress.getCode()); va.setVideoId(videoId); videoActressMapper.insertSelective(va); } else { actress = new Actress(); actress.setName(aName); actressMapper.insertSelective(actress); int actressId = actressMapper.queryByActress(actress).getId(); VideoActress va = new VideoActress(); va.setActressCode(actress.getCode()); va.setVideoId(videoId); videoActressMapper.insertSelective(va); } } }); } Elements categoryElements = doc.select("div#video_genres span.genre"); if (CollectionUtils.isNotEmpty(categoryElements)) { categoryElements.stream().forEach(c -> { String cDescription = c.text().toString().trim(); if (StringUtils.isNotBlank(cDescription)) { Category queryCategory = new Category(); queryCategory.setSubtype(cDescription); Category category = categoryMapper.queryByCategory(queryCategory); if (null != category) { VideoCategory vc = new VideoCategory(); vc.setCategoryId(category.getId()); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); videoCategoryMapper.insertSelective(vc); } else { category = new Category(); category.setSubtype(cDescription); categoryMapper.insertSelective(category); int categoryId = categoryMapper.queryByCategory(category).getId(); VideoCategory vc = new VideoCategory(); vc.setCategoryId(categoryId); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); videoCategoryMapper.insertSelective(vc); } } }); } } }
From source file:com.app.rest.ExperianIntegrationService.java
@POST @Path("/questionForCustomer") @Produces({ MediaType.APPLICATION_JSON }) @Consumes(MediaType.APPLICATION_JSON)/* w w w. j a va 2s . c om*/ public ResponseModel getQuestion(String inputJsonObj) { ResponseModel responseMap = new ResponseModel(); //String requestParams = (String) inputJsonObj.get("input"); String message = ""; String jsessionId2 = ""; String responseJson = null; String logMarker = null; try { Map map = parseJson(inputJsonObj); logMarker = map.get("LOG_MARKER").toString(); jsessionId2 = map.get("jsessionId2").toString(); logger.info("getQuestion ~ " + (map.get("LOG_MARKER") == null ? "NOT_GIVEN" : map.get("LOG_MARKER")) + " ~ jsessionId2: " + (jsessionId2 == null ? "null" : jsessionId2) + "~Log Marker 1"); while (true) { ArrayList<NameValuePair> params = new ArrayList<NameValuePair>(); params.add(new BasicNameValuePair("answer", map.get("answer").toString())); params.add(new BasicNameValuePair("questionId", map.get("qid").toString())); params.add(new BasicNameValuePair("stgOneHitId", map.get("stgOneHitId").toString())); params.add(new BasicNameValuePair("stgTwoHitId", map.get("stgTwoHitId").toString())); String request = getQuery(params); Map questionMap = HttpConnection.generateQuestionForConsumer(jsessionId2, request); responseJson = (String) questionMap.get("responseJson"); if (responseJson.equalsIgnoreCase("passedReport")) { String pdfData = (String) questionMap.get("showHtmlReportForCreditReport"); Document doc = Jsoup.parse(pdfData); Element input = doc.select("input[name=xmlResponse]").first(); String response = input.attr("value"); responseMap.setErrorMessage("Success"); responseMap.setXmlResponse(response); responseMap.setTotalResponse(pdfData); } if (responseJson.equalsIgnoreCase("next")) { questionMap.put("jsessionId2", jsessionId2); responseMap.setResponseMap(questionMap); } if (responseJson.equalsIgnoreCase("systemError")) { responseMap.setErrorMessage("systemError"); } if (responseJson.equalsIgnoreCase("inCorrectAnswersGiven")) { responseMap.setErrorMessage("inCorrectAnswersGiven"); } if (responseJson.equalsIgnoreCase("insufficientQuestion")) { responseMap.setErrorMessage("insufficientQuestion"); } if (responseJson.equalsIgnoreCase("error") || responseJson.equalsIgnoreCase("creditReportEmpty")) { responseMap.setErrorMessage("creditReportEmpty"); } return responseMap; } } catch (Exception e) { logger.info("getQuestion ~ " + (logMarker == null ? "NOT_GIVEN" : logMarker) + " ~ jsessionId2: " + (jsessionId2 == null ? "null" : jsessionId2) + "~Log Marker 2"); responseMap.setErrorMessage("Error occured"); responseMap.setExceptionString(e.toString()); return responseMap; } }
From source file:com.amastigote.xdu.query.module.EduSystem.java
private void preLogin() throws IOException { URL url = new URL(SYS_HOST); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setInstanceFollowRedirects(false); httpURLConnection.connect();// ww w.j a v a 2 s . c om List<String> cookies_to_set_a = httpURLConnection.getHeaderFields().get("Set-Cookie"); for (String e : cookies_to_set_a) if (e.contains("JSESSIONID=")) SYS_JSESSIONID = e.substring(e.indexOf("JSESSIONID=") + 11, e.indexOf(";")); httpURLConnection.disconnect(); httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setInstanceFollowRedirects(true); httpURLConnection.connect(); List<String> cookies_to_set = httpURLConnection.getHeaderFields().get("Set-Cookie"); for (String e : cookies_to_set) { if (e.contains("route=")) ROUTE = e.substring(6); else if (e.contains("JSESSIONID=")) LOGIN_JSESSIONID = e.substring(11, e.indexOf(";")); else if (e.contains("BIGipServeridsnew.xidian.edu.cn=")) BIGIP_SERVER_IDS_NEW = e.substring(32, e.indexOf(";")); } BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(httpURLConnection.getInputStream())); String html = ""; String temp; while ((temp = bufferedReader.readLine()) != null) { html += temp; } Document document = Jsoup.parse(html); Elements elements = document.select("input[type=hidden]"); for (Element element : elements) { switch (element.attr("name")) { case "lt": LOGIN_PARAM_lt = element.attr("value"); break; case "execution": LOGIN_PARAM_execution = element.attr("value"); break; case "_eventId": LOGIN_PARAM__eventId = element.attr("value"); break; case "rmShown": LOGIN_PARAM_rmShown = element.attr("value"); break; } } }
From source file:de.geeksfactory.opacclient.apis.Littera.java
@Override public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException { if (!initialised) { start();/*from w w w .java 2 s . c o m*/ } final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element detailData = doc.select(".detailData").first(); final Element detailTable = detailData.select("table.titel").first(); final Element availabilityTable = doc.select(".bibliothek table").first(); final DetailledItem result = new DetailledItem(); final Copy copy = new Copy(); result.addCopy(copy); result.setId(id); result.setCover(getCover(doc)); result.setTitle(detailData.select("h3").first().text()); result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media"))); copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available")); copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent"))); copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations")); for (final Element tr : detailTable.select("tr")) { final String desc = tr.child(0).text(); final String content = tr.child(1).text(); if (desc != null && !desc.trim().equals("")) { result.addDetail(new Detail(desc, content)); } else if (!result.getDetails().isEmpty()) { final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1); lastDetail.setHtml(true); lastDetail.setContent(lastDetail.getContent() + "\n" + content); } } return result; }
From source file:com.amastigote.xdu.query.module.EduSystem.java
@Override public boolean login(String username, String password) throws IOException { preLogin();//w w w. ja v a 2 s . com URL url = new URL(LOGIN_HOST + LOGIN_SUFFIX); HttpURLConnection httpURLConnection_a = (HttpURLConnection) url.openConnection(); httpURLConnection_a.setRequestMethod("POST"); httpURLConnection_a.setUseCaches(false); httpURLConnection_a.setInstanceFollowRedirects(false); httpURLConnection_a.setDoOutput(true); String OUTPUT_DATA = "username="; OUTPUT_DATA += username; OUTPUT_DATA += "&password="; OUTPUT_DATA += password; OUTPUT_DATA += "&submit="; OUTPUT_DATA += "<=" + LOGIN_PARAM_lt; OUTPUT_DATA += "&execution=" + LOGIN_PARAM_execution; OUTPUT_DATA += "&_eventId=" + LOGIN_PARAM__eventId; OUTPUT_DATA += "&rmShown=" + LOGIN_PARAM_rmShown; httpURLConnection_a.setRequestProperty("Cookie", "route=" + ROUTE + "; org.springframework.web.servlet.i18n.CookieLocaleResolver.LOCALE=zh_CN; JSESSIONID=" + LOGIN_JSESSIONID + "; BIGipServeridsnew.xidian.edu.cn=" + BIGIP_SERVER_IDS_NEW + ";"); httpURLConnection_a.connect(); OutputStreamWriter outputStreamWriter = new OutputStreamWriter(httpURLConnection_a.getOutputStream(), "UTF-8"); outputStreamWriter.write(OUTPUT_DATA); outputStreamWriter.flush(); outputStreamWriter.close(); BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(httpURLConnection_a.getInputStream())); String html = ""; String temp; while ((temp = bufferedReader.readLine()) != null) { html += temp; } Document document = Jsoup.parse(html); Elements elements = document.select("a"); if (elements.size() == 0) return false; String SYS_LOCATION = elements.get(0).attr("href"); URL sys_location = new URL(SYS_LOCATION); HttpURLConnection httpUrlConnection_b = (HttpURLConnection) sys_location.openConnection(); httpUrlConnection_b.setInstanceFollowRedirects(false); httpUrlConnection_b.connect(); List<String> cookies_to_set = httpUrlConnection_b.getHeaderFields().get("Set-Cookie"); for (String e : cookies_to_set) if (e.contains("JSESSIONID=")) SYS_JSESSIONID = e.substring(11, e.indexOf(";")); httpURLConnection_a.disconnect(); httpUrlConnection_b.disconnect(); return checkIsLogin(username); }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex) throws IOException, OpacErrorException, JSONException { final String searchUrl; if (!initialised) { start();// w w w. j a v a 2s.c o m } try { searchUrl = buildSearchUrl(query, pageIndex); } catch (URISyntaxException e) { throw new RuntimeException(e); } final String html = httpGet(searchUrl, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element navigation = doc.select(".result_view .navigation").first(); final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0; final Element ul = doc.select(".result_view ul.list").first(); final List<SearchResult> results = new ArrayList<>(); for (final Element li : ul.children()) { if (li.hasClass("zugangsmonat")) { continue; } final SearchResult result = new SearchResult(); final Element title = li.select(".titelinfo a").first(); result.setId(getQueryParamsFirst(title.attr("href")).get("id")); result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text()); result.setNr(results.size()); result.setPage(pageIndex); result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text())); result.setCover(getCover(li)); final String statusImg = li.select(".status img").attr("src"); result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN : statusImg.contains("-no") ? SearchResult.Status.RED : null); results.add(result); } return new SearchRequestResult(results, totalResults, pageIndex); }
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public void getEvent(String eventId, String evType) { try {//w w w. ja va 2 s . c o m Document dc = Jsoup.connect("https://afisha.yandex.ru/msk/events/" + eventId + "/").get(); Event eb = new Event(); eb.setEventID(eventId); eb.setCategory(eventTypes.get(evType)); Elements elems = dc.select("meta"); for (Element e : elems) { if (e.attributes().get("property").contains("og:description")) { eb.setDescription(e.attributes().get("content")); } } elems = dc.select("title"); for (Element e : elems) { eb.setName(e.html().substring(0, e.html().indexOf(""))); } elems = dc.select("a[href]"); for (Element e : elems) { for (Attribute attr : e.attributes().asList()) { if (attr.getValue().contains("/msk/places/")) { eb.setPlace(getEventPlaces(attr.getValue())); } } } elems = dc.select("tr[id]"); for (Element e : elems) { for (Attribute attr : e.attributes().asList()) { if (attr.getValue().contains("f")) { eb.setDate(e.children().first().html()); try { Element e1 = e.child(1).children().first(); Element e2 = e1.children().first(); Element e3 = e2.children().first(); Element e4 = e3.children().first(); eb.setTime(e4.html()); } catch (NullPointerException ex) { Element e1 = e.child(2).children().first(); Element e2 = e1.children().first(); Element e3 = e2.children().first(); Element e4 = e3.children().first(); eb.setTime(e4.html()); } } } } geoCode(eb); formJson(eb); } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.dataconservancy.ui.it.support.ListMetadataFormatRequest.java
/** * Attempts to parse the HTML supplied by {@code htmlBody} into * {@link org.dataconservancy.ui.stripes.UiConfigurationActionBean.MetaDataFormatTransport} objects. Typically the * {@code htmlBody} will be produced by retrieving the content of {@link UiUrlConfig#getListMetadataFormatUrl() * listing metadata formats}.// ww w . j a v a 2s . co m * <p/> * JSoup is used internally to parse the HTML into a DOM tree. The <table> is retrieved by selecting the div * identifier {@link #METADATA_FORMAT_TABLE}, and each row in the table parsed and converted to a * {@link org.dataconservancy.ui.stripes.UiConfigurationActionBean.MetaDataFormatTransport}. * * @param htmlBody the InputStream containing an HTML document with Metadata Formats * @return a {@code List}, in document order, of the Metadata Formats in the system */ public List<UiConfigurationActionBean.MetaDataFormatTransport> listFormats(InputStream htmlBody) { final List<UiConfigurationActionBean.MetaDataFormatTransport> results = new ArrayList<UiConfigurationActionBean.MetaDataFormatTransport>(); final Document dom; try { final String html = IOUtils.toString(htmlBody); dom = Jsoup.parse(html); } catch (IOException e) { throw new RuntimeException(e.getMessage(), e); } final Elements table = dom.select("table#" + METADATA_FORMAT_TABLE); assertNotNull(table); assertEquals(1, table.size()); final Elements rows = table.get(0).getElementsByTag("tr"); assertNotNull(rows); assertTrue(rows.size() > 1); // Skip the first row, it's a header for (int i = 1; i < rows.size(); i++) { final Element row = rows.get(i); results.add(toMetadataTransport(row)); } return results; }
From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java
/** * ??,?// w w w .j a v a 2 s . c o m */ @Override public Collection<HttpSeed> findSeed(Collection<HttpSeed> seeds) throws Exception { if (CollectionUtils.isEmpty(seeds)) { return null; } Collection<HttpSeed> seedGroups = new ArrayList<HttpSeed>(); // ?HTMLA for (HttpSeed seed : seeds) { Document doc = parse(seed.getHtml()); Elements drug_elements = doc.select("a[href]"); if (drug_elements.isEmpty()) { return null; } for (Element drug_e : drug_elements) { String href_string = drug_e.attr("href"); String uri = href_string.substring(href_string.indexOf("'") + 1, href_string.lastIndexOf("'")); if (StringUtils.isBlank(uri)) { continue; } seedGroups.add(initDetailHttpSeed(DOMAIN + uri)); } } return seedGroups; }