List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:faescapeplan.FAEscapePlanUI.java
private ArrayList<String> indexSection(String section) { ArrayList<String> idList = new ArrayList<>(); boolean itemsRemain = true; int pageCount = 1; updateTextLog("Indexing " + section + "..."); while (itemsRemain) { try {/*from w ww . j av a2 s . c om*/ Document currentPage = Jsoup .connect("http://www.furaffinity.net/" + section + "/" + userData.getName() + "/" + pageCount + "/") // TEST .timeout(10000).userAgent(USER_AGENT).cookies(userData.getCookies()).get(); if (currentPage.getElementById("no-images") == null) { updateTextLog("Indexing page " + pageCount); Elements elementList = currentPage.getElementsByAttributeValueMatching("id", "sid_\\d+"); for (Element item : elementList) { String cleanId = item.attr("id").replace("sid_", ""); idList.add(cleanId); } pageCount++; } else { itemsRemain = false; updateTextLog("Finished indexing " + section); } } catch (HttpStatusException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Could not connect to FA"); // DEBUG break; } catch (SocketTimeoutException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Connection timed out"); // DEBUG break; } catch (IOException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("An IO Exception occurred while indexing " + section); // DEBUG break; } } return idList; }
From source file:de.geeksfactory.opacclient.apis.Zones.java
@Override public List<SearchField> getSearchFields() throws IOException { if (!initialised) start();/*from w w w . j a v a2 s .co m*/ List<SearchField> fields = new ArrayList<>(); String html = httpGet(opac_url + "/APS_ZONES?fn=AdvancedSearch&Style=Portal3&SubStyle=&Lang=GER" + "&ResponseEncoding=utf-8", getDefaultEncoding()); Document doc = Jsoup.parse(html); // find text fields Elements txt_opts = doc.select("#formSelectTerm_1 option"); for (Element opt : txt_opts) { TextSearchField field = new TextSearchField(); field.setId(opt.attr("value")); field.setHint(""); field.setDisplayName(opt.text()); fields.add(field); } // find filters String filtersQuery = version18 ? ".inSearchLimits .floatingBox" : ".TabRechAv .limitBlock"; Elements filters = doc.select(filtersQuery); int i = 0; for (Element filter : filters) { DropdownSearchField dropdown = new DropdownSearchField(); dropdown.addDropdownValue("", "Alle"); // All dropdowns use "q.limits.limit" as URL param, but they must not have the same ID dropdown.setId("dropdown_" + i); if (version18) { dropdown.setDisplayName(filter.select("tr").get(0).text().trim()); Elements opts = filter.select("tr").get(1).select("table td:has(input)"); for (Element opt : opts) { dropdown.addDropdownValue(opt.select("input").attr("value"), opt.text().trim()); } } else { dropdown.setDisplayName(filter.parent().previousElementSibling().text().trim()); Elements opts = filter.select(".limitChoice label"); for (Element opt : opts) { dropdown.addDropdownValue(opt.attr("for"), opt.text().trim()); } } fields.add(dropdown); i++; } return fields; }
From source file:de.geeksfactory.opacclient.apis.Zones22.java
private DetailledItem parse_result(String id, String html) throws IOException { Document doc = Jsoup.parse(html); DetailledItem result = new DetailledItem(); result.setTitle(""); boolean title_is_set = false; result.setId(id);/*from www. j a v a2s . c o m*/ Elements detaildiv = doc.select("div.record-item-new"); Elements detailtrs1 = doc.select(".DetailDataCell table table:not(.inRecordHeader) tr"); for (int i = 0; i < detailtrs1.size(); i++) { Element tr = detailtrs1.get(i); int s = tr.children().size(); if (tr.child(0).text().trim().equals("Titel") && !title_is_set) { result.setTitle(tr.child(s - 1).text().trim()); title_is_set = true; } else if (s > 1) { Element valchild = tr.child(s - 1); if (valchild.select("table").isEmpty()) { String val = valchild.text().trim(); if (val.length() > 0) result.addDetail(new Detail(tr.child(0).text().trim(), val)); } } } for (Element a : doc.select("a.SummaryActionLink")) { if (a.text().contains("Vormerken")) { result.setReservable(true); result.setReservation_info(a.attr("href")); } } if (!detaildiv.isEmpty()) { for (int i = 0; i < detaildiv.size(); i++) { Element dd = detaildiv.get(i); String text = ""; for (Node node : dd.childNodes()) { if (node instanceof TextNode) { String snip = ((TextNode) node).text(); if (snip.length() > 0) text += snip; } else if (node instanceof Element) { if (((Element) node).tagName().equals("br")) text += "\n"; else { String snip = ((Element) node).text().trim(); if (snip.length() > 0) text += snip; } } } result.addDetail(new Detail("", text)); } } if (doc.select("span.z3988").size() > 0) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. String z3988data = doc.select("span.z3988").first().attr("title").trim(); for (String pair : z3988data.split("\\&")) { String[] nv = pair.split("=", 2); if (nv.length == 2) { if (!nv[1].trim().equals("")) { if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.au")) { result.addDetail(new Detail("Author", nv[1])); } } } } } Elements copydivs = doc.select(".DetailDataCell div[id^=stock_]"); String pop = ""; for (int i = 0; i < copydivs.size(); i++) { Element div = copydivs.get(i); if (div.attr("id").startsWith("stock_head")) { pop = div.text().trim(); continue; } Map<String, String> copy = new HashMap<String, String>(); // This is getting very ugly - check if it is valid for libraries // which are not // Hamburg. int j = 0; for (Node node : div.childNodes()) { try { if (node instanceof Element) { if (((Element) node).tag().getName().equals("br")) { copy.put(DetailledItem.KEY_COPY_BRANCH, pop); result.addCopy(copy); j = -1; } else if (((Element) node).tag().getName().equals("b") && j == 1) { copy.put(DetailledItem.KEY_COPY_LOCATION, ((Element) node).text()); } else if (((Element) node).tag().getName().equals("b") && j > 1) { copy.put(DetailledItem.KEY_COPY_STATUS, ((Element) node).text()); } j++; } else if (node instanceof TextNode) { if (j == 0) copy.put(DetailledItem.KEY_COPY_DEPARTMENT, ((TextNode) node).text()); if (j == 2) copy.put(DetailledItem.KEY_COPY_BARCODE, ((TextNode) node).getWholeText().trim().split("\n")[0].trim()); if (j == 6) { String text = ((TextNode) node).text().trim(); copy.put(DetailledItem.KEY_COPY_RETURN, text.substring(text.length() - 10)); } j++; } } catch (Exception e) { e.printStackTrace(); } } } return result; }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
private String generateQuery(Element form) throws UnsupportedEncodingException { StringBuilder builder = new StringBuilder(); builder.append(form.attr("action").substring(1)); int i = 0;/*from ww w .j a v a2 s . c om*/ for (Element input : form.select("input")) { builder.append(i == 0 ? "?" : "&"); builder.append(input.attr("name")).append("=").append(URLEncoder.encode(input.attr("value"), "UTF-8")); i++; } return builder.toString(); }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
@Override public ReservationResult reservation(DetailledItem item, Account account, int useraction, String selection) throws IOException { String reservation_info = item.getReservation_info(); // STEP 1: Login page String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.select("table").first().text().contains("kann nicht")) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim()); }//w ww . j a v a 2 s . co m if (doc.select("form[name=form1]").size() == 0) { return new ReservationResult(MultiStepResult.Status.ERROR); } Element form = doc.select("form[name=form1]").first(); List<BasicNameValuePair> params = new ArrayList<>(); params.add(new BasicNameValuePair("sleKndNr", account.getName())); params.add(new BasicNameValuePair("slePw", account.getPassword())); params.add(new BasicNameValuePair("pshLogin", "Reservieren")); for (Element input : form.select("input[type=hidden]")) { params.add(new BasicNameValuePair(input.attr("name"), input.attr("value"))); } // STEP 2: Confirmation page html = httpPost(opac_url + "/cgi-bin/di.exe", new UrlEncodedFormEntity(params), getDefaultEncoding()); doc = Jsoup.parse(html); if (doc.select("form[name=form1]").size() > 0) { // STEP 3: There is another confirmation needed form = doc.select("form[name=form1]").first(); html = httpGet(opac_url + "/" + generateQuery(form), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc.text().contains("fehlgeschlagen") || doc.text().contains("Achtung") || doc.text().contains("nicht m")) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim()); } else { return new ReservationResult(MultiStepResult.Status.OK); } }
From source file:com.jimplush.goose.ContentExtractor.java
/** * adds a score to the gravityScore Attribute we put on divs * we'll get the current score then add the score we're passing in to the current * * @param node// w w w .jav a 2s . c o m * @param addToScore - the score to add to the node */ private void updateScore(Element node, int addToScore) { int currentScore; try { String scoreString = node.attr("gravityScore"); currentScore = string.isNullOrEmpty(scoreString) ? 0 : Integer.parseInt(scoreString); } catch (NumberFormatException e) { currentScore = 0; } int newScore = currentScore + addToScore; node.attr("gravityScore", Integer.toString(newScore)); }
From source file:com.jimplush.goose.ContentExtractor.java
/** * stores how many decent nodes are under a parent node * * @param node/*w w w. j a v a 2s .c o m*/ * @param addToCount */ private void updateNodeCount(Element node, int addToCount) { int currentScore; try { String countString = node.attr("gravityNodes"); currentScore = string.isNullOrEmpty(countString) ? 0 : Integer.parseInt(countString); } catch (NumberFormatException e) { currentScore = 0; } int newScore = currentScore + addToCount; node.attr("gravityNodes", Integer.toString(newScore)); }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
private SearchField createSearchField(String name, String hint, Element input) { if (input.tagName().equals("input") && input.attr("type").equals("text")) { TextSearchField field = new TextSearchField(); field.setDisplayName(name);/*from w w w . j a va2 s . co m*/ field.setHint(hint); field.setId(input.attr("name")); return field; } else if (input.tagName().equals("select")) { DropdownSearchField field = new DropdownSearchField(); field.setDisplayName(name); field.setId(input.attr("name")); for (Element option : input.select("option")) { field.addDropdownValue(option.attr("value"), option.text()); } return field; } else { return null; } }
From source file:sample.ui.mvc.MessageController.java
private String getBidId(Message message) { try {// w w w .j a v a 2 s .co m BasicCookieStore cookieStore = new BasicCookieStore(); CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(cookieStore).build(); doLogin(cookieStore, httpclient, ZHANGDAIYIXIAN); // String bidName = message.getBidName(); // time // String mainUrl = "http://www.wujinsuo.cn:80/index.php"; HttpGet httpget = new HttpGet(mainUrl); httpget.addHeader("Accept", ACCEPT); httpget.addHeader("User-Agent", AGENT); ResponseHandler<String> responseHandler = new ResponseHandler<String>() { public String handleResponse(final HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status >= 200 && status < 300) { HttpEntity entity = response.getEntity(); return entity != null ? EntityUtils.toString(entity) : null; } else { throw new ClientProtocolException("Unexpected response status: " + status); } } }; String resultString = httpclient.execute(httpget, responseHandler); // parse html Document doc = Jsoup.parse(resultString); Elements links = doc.select("a[href]"); Element aElement = null; for (Element e : links) { List<Node> childNode = e.childNodes(); if (childNode.size() != 1) continue; Node node = childNode.get(0); if ("span".equals(node.nodeName())) { String html = node.outerHtml(); logger.info(html); if (html.contains(bidName)) { // okle aElement = e; } } } if (aElement == null) { // retry return ""; } else { String href = aElement.attr("href"); String bidId = StringUtils.substringAfter(href, "id="); logger.info(bidId); return bidId; } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:de.geeksfactory.opacclient.apis.Zones.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { Document login = login(acc);/*from www. j a va 2 s .c o m*/ if (login == null) { return null; } AccountData res = new AccountData(acc.getId()); String lentLink = null; String resLink = null; int lent_cnt = -1; int res_cnt = -1; for (Element td : login.select(".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, " + ".CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) { String section = td.text().trim(); if (section.contains("Entliehene Medien")) { lentLink = td.select("a").attr("href"); lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Vormerkungen")) { resLink = td.select("a").attr("href"); res_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Kontostand")) { res.setPendingFees(td.nextElementSibling().text().trim()); } else if (section.matches("Ausweis g.ltig bis")) { res.setValidUntil(td.nextElementSibling().text().trim()); } } for (Element a : login.select("a.AccountMenuLink")) { if (a.text().contains("Ausleihen")) { lentLink = a.attr("href"); } else if (a.text().contains("Vormerkungen")) { resLink = a.attr("href"); } } if (lentLink == null) { return null; } List<LentItem> lentItems = new ArrayList<>(); String lentUrl = opac_url + "/" + lentLink.replace("utf-8?Method", "utf-8&Method"); String lentHtml = httpGet(lentUrl, getDefaultEncoding()); Document lentDoc = Jsoup.parse(lentHtml); lentDoc.setBaseUri(lentUrl); loadMediaList(lentDoc, lentItems); res.setLent(lentItems); // In Koeln, the reservations link only doesn't show on the overview page if (resLink == null) { for (Element a : lentDoc.select("a.AccountMenuLink")) { if (a.text().contains("Vormerkungen")) { resLink = a.attr("href"); } } } List<ReservedItem> reservedItems = new ArrayList<>(); String resHtml = httpGet(opac_url + "/" + resLink, getDefaultEncoding()); Document resDoc = Jsoup.parse(resHtml); loadResList(resDoc, reservedItems); res.setReservations(reservedItems); return res; }