List of usage examples for org.jsoup.nodes Element childNodes
List childNodes
To view the source code for org.jsoup.nodes Element childNodes.
Click Source Link
From source file:de.geeksfactory.opacclient.apis.Zones.java
private DetailledItem parse_result(String id, String html) { Document doc = Jsoup.parse(html); DetailledItem result = new DetailledItem(); result.setTitle(""); boolean title_is_set = false; result.setId(id);// w w w . ja va 2s .c o m String detailTrsQuery = version18 ? ".inRoundBox1 table table tr" : ".DetailDataCell table table:not(.inRecordHeader) tr"; Elements detailtrs1 = doc.select(detailTrsQuery); for (int i = 0; i < detailtrs1.size(); i++) { Element tr = detailtrs1.get(i); int s = tr.children().size(); if (tr.child(0).text().trim().equals("Titel") && !title_is_set) { result.setTitle(tr.child(s - 1).text().trim()); title_is_set = true; } else if (s > 1) { Element valchild = tr.child(s - 1); if (valchild.select("table").isEmpty()) { String val = valchild.text().trim(); if (val.length() > 0) { result.addDetail(new Detail(tr.child(0).text().trim(), val)); } } } } for (Element a : doc.select("a.SummaryActionLink")) { if (a.text().contains("Vormerken")) { result.setReservable(true); result.setReservation_info(a.attr("href")); } } Elements detaildiv = doc.select("div.record-item-new"); if (!detaildiv.isEmpty()) { for (int i = 0; i < detaildiv.size(); i++) { Element dd = detaildiv.get(i); String text = ""; for (Node node : dd.childNodes()) { if (node instanceof TextNode) { String snip = ((TextNode) node).text(); if (snip.length() > 0) { text += snip; } } else if (node instanceof Element) { if (((Element) node).tagName().equals("br")) { text += "\n"; } else { String snip = ((Element) node).text().trim(); if (snip.length() > 0) { text += snip; } } } } result.addDetail(new Detail("", text)); } } if (doc.select("span.z3988").size() > 0) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. String z3988data = doc.select("span.z3988").first().attr("title").trim(); for (String pair : z3988data.split("&")) { String[] nv = pair.split("=", 2); if (nv.length == 2) { if (!nv[1].trim().equals("")) { if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.au")) { result.addDetail(new Detail("Author", nv[1])); } } } } } // Cover if (doc.select(".BookCover, .LargeBookCover").size() > 0) { result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src")); } Elements copydivs = doc.select("div[id^=stock_]"); String pop = ""; for (int i = 0; i < copydivs.size(); i++) { Element div = copydivs.get(i); if (div.attr("id").startsWith("stock_head")) { pop = div.text().trim(); continue; } Copy copy = new Copy(); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); // This is getting very ugly - check if it is valid for libraries which are not Hamburg. // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015) int j = 0; for (Node node : div.childNodes()) { try { if (node instanceof Element) { if (((Element) node).tag().getName().equals("br")) { copy.setBranch(pop); result.addCopy(copy); j = -1; } else if (((Element) node).tag().getName().equals("b") && j == 1) { copy.setLocation(((Element) node).text()); } else if (((Element) node).tag().getName().equals("b") && j > 1) { copy.setStatus(((Element) node).text()); } j++; } else if (node instanceof TextNode) { if (j == 0) { copy.setDepartment(((TextNode) node).text()); } if (j == 2) { copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim()); } if (j == 6) { String text = ((TextNode) node).text().trim(); String date = text.substring(text.length() - 10); try { copy.setReturnDate(fmt.parseLocalDate(date)); } catch (IllegalArgumentException e) { e.printStackTrace(); } } j++; } } catch (Exception e) { e.printStackTrace(); } } } return result; }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public AccountData account(Account account) throws IOException, JSONException, OpacErrorException { login(account);/*from w w w . j a v a 2s . com*/ String html; Document doc; AccountData adata = new AccountData(account.getId()); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); html = httpGet(opac_url + "/konto.cgi?sess=" + sessid, getDefaultEncoding()); doc = Jsoup.parse(html); doc.setBaseUri(opac_url + "/"); for (Element td : doc.select("table.konto td")) { if (td.text().contains("Offene")) { String text = td.text().trim().replaceAll( "Offene[^0-9]+Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.)", "$1 $2"); adata.setPendingFees(text); } } List<LentItem> lent = new ArrayList<>(); for (Element tr : doc.select("table.kontopos tr")) { LentItem item = new LentItem(); Element desc = tr.child(1).select("label").first(); String dates = tr.child(2).text().trim(); if (tr.child(1).select("a").size() > 0) { String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey"); item.setId(kk); } int i = 0; for (Node node : desc.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0) { item.setAuthor(text); } else if (i == 1) { item.setTitle(text); } else if (text.contains("Mediennummer")) { item.setBarcode(text.replace("Mediennummer: ", "")); } i++; } } if (tr.child(0).select("input").size() == 1) { item.setProlongData(tr.child(0).select("input").first().val()); item.setRenewable(true); } else { item.setProlongData("" + tr.child(0).select("span").first().attr("class")); item.setRenewable(false); } String todate = dates; if (todate.contains("-")) { String[] datesplit = todate.split("-"); todate = datesplit[1].trim(); } try { item.setDeadline(fmt.parseLocalDate(todate.substring(0, 10))); } catch (IllegalArgumentException e) { e.printStackTrace(); } lent.add(item); } adata.setLent(lent); List<ReservedItem> reservations = new ArrayList<>(); html = httpGet(opac_url + "/konto.cgi?konto=v&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); html = httpGet(opac_url + "/konto.cgi?konto=b&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); adata.setReservations(reservations); return adata; }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
protected List<ReservedItem> parse_reservations(String html) { Document doc = Jsoup.parse(html); List<ReservedItem> reservations = new ArrayList<>(); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : doc.select("table.kontopos tr")) { ReservedItem item = new ReservedItem(); Element desc = tr.child(1).select("label").first(); Element pos = tr.child(3); if (tr.child(1).select("a").size() > 0) { String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey"); item.setId(kk);/* w w w .j a v a 2 s. com*/ } if (tr.child(0).select("input").size() > 0) { item.setCancelData(tr.child(0).select("input").first().val()); } int i = 0; for (Node node : desc.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0) { item.setAuthor(text); } else if (i == 1) { item.setTitle(text); } i++; } } i = 0; for (Node node : pos.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0 && text.contains("")) { try { item.setReadyDate(fmt.parseLocalDate(text)); } catch (IllegalArgumentException e) { item.setStatus(text); } } else if (i == 1) { item.setBranch(text); } i++; } } reservations.add(item); } return reservations; }
From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java
/** * Extract text and hyperlinks from an element * supposingly containing only text.//w w w . java 2s .co m * * @param textElement * The element to be processed. * @param rawStr * The StringBuffer to contain the raw text. * @param linkedStr * The StringBuffer to contain the text with hyperlinks. */ private void processTextElement(Element textElement, StringBuilder rawStr, StringBuilder linkedStr) { // we process each element contained in the specified text element for (Node node : textElement.childNodes()) { // element node if (node instanceof Element) { Element element = (Element) node; String eltName = element.tag().getName(); // section headers: same thing if (eltName.equals(XmlNames.ELT_H2) || eltName.equals(XmlNames.ELT_H3) || eltName.equals(XmlNames.ELT_H4) || eltName.equals(XmlNames.ELT_H5) || eltName.equals(XmlNames.ELT_H6)) { processParagraphElement(element, rawStr, linkedStr); } // paragraphs inside paragraphs are processed recursively else if (eltName.equals(XmlNames.ELT_P)) { processParagraphElement(element, rawStr, linkedStr); } // superscripts are to be avoided else if (eltName.equals(XmlNames.ELT_SUP)) { // they are either external references or WP inline notes // cf. http://en.wikipedia.org/wiki/Template%3ACitation_needed } // small caps are placed before phonetic transcriptions of names, which we avoid else if (eltName.equals(XmlNames.ELT_SMALL)) { // we don't need them, and they can mess up NER tools } // we ignore certain types of span (phonetic trancription, WP buttons...) else if (eltName.equals(XmlNames.ELT_SPAN)) { processSpanElement(element, rawStr, linkedStr); } // hyperlinks must be included in the linked string, provided they are not external else if (eltName.equals(XmlNames.ELT_A)) { processHyperlinkElement(element, rawStr, linkedStr); } // lists else if (eltName.equals(XmlNames.ELT_UL)) { processListElement(element, rawStr, linkedStr, false); } else if (eltName.equals(XmlNames.ELT_OL)) { processListElement(element, rawStr, linkedStr, true); } else if (eltName.equals(XmlNames.ELT_DL)) { processDescriptionListElement(element, rawStr, linkedStr); } // list item else if (eltName.equals(XmlNames.ELT_LI)) { processTextElement(element, rawStr, linkedStr); } // divisions are just processed recursively else if (eltName.equals(XmlNames.ELT_DIV)) { processDivisionElement(element, rawStr, linkedStr); } // quotes are just processed recursively else if (eltName.equals(XmlNames.ELT_BLOCKQUOTE)) { processQuoteElement(element, rawStr, linkedStr); } // citation else if (eltName.equals(XmlNames.ELT_CITE)) { processParagraphElement(element, rawStr, linkedStr); } // other elements are considered as simple text else { String text = element.text(); rawStr.append(text); linkedStr.append(text); } } // text node else if (node instanceof TextNode) { // get the text TextNode textNode = (TextNode) node; String text = textNode.text(); // if at the begining of a new line, or already preceeded by a space, remove leading spaces while (rawStr.length() > 0 && (rawStr.charAt(rawStr.length() - 1) == '\n' || rawStr.charAt(rawStr.length() - 1) == ' ') && text.startsWith(" ")) text = text.substring(1); // complete string buffers rawStr.append(text); linkedStr.append(text); } } }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected DetailledItem parse_result(String html) throws IOException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);/*w w w. ja v a 2 s.c o m*/ String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING); Document doc2 = Jsoup.parse(html2); doc2.setBaseUri(opac_url); String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive", ENCODING); Document doc3 = Jsoup.parse(html3); doc3.setBaseUri(opac_url); DetailledItem result = new DetailledItem(); try { result.setId(doc.select("#bibtip_id").text().trim()); } catch (Exception ex) { ex.printStackTrace(); } List<String> reservationlinks = new ArrayList<>(); for (Element link : doc3.select("#vormerkung a, #tab-content a")) { String href = link.absUrl("href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (result.getId() == null) { // ID retrieval String key = hrefq.get("katkey"); if (key != null) { result.setId(key); break; } } // Vormerken if (hrefq.get("methodToCall") != null) { if (hrefq.get("methodToCall").equals("doVormerkung") || hrefq.get("methodToCall").equals("doBestellung")) { reservationlinks.add(href.split("\\?")[1]); } } } if (reservationlinks.size() == 1) { result.setReservable(true); result.setReservation_info(reservationlinks.get(0)); } else if (reservationlinks.size() == 0) { result.setReservable(false); } else { // TODO: Multiple options - handle this case! } if (doc.select(".data td img").size() == 1) { result.setCover(doc.select(".data td img").first().attr("abs:src")); try { downloadCover(result); } catch (Exception e) { } } if (doc.select(".aw_teaser_title").size() == 1) { result.setTitle(doc.select(".aw_teaser_title").first().text().trim()); } else if (doc.select(".data td strong").size() > 0) { result.setTitle(doc.select(".data td strong").first().text().trim()); } else { result.setTitle(""); } if (doc.select(".aw_teaser_title_zusatz").size() > 0) { result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim())); } String title = ""; String text = ""; boolean takeover = false; Element detailtrs = doc2.select(".box-container .data td").first(); for (Node node : detailtrs.childNodes()) { if (node instanceof Element) { if (((Element) node).tagName().equals("strong")) { title = ((Element) node).text().trim(); text = ""; } else { if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) { text = text + node.attr("href"); takeover = true; break; } } } else if (node instanceof TextNode) { text = text + ((TextNode) node).text(); } } if (!takeover) { text = ""; title = ""; } detailtrs = doc2.select("#tab-content .data td").first(); if (detailtrs != null) { for (Node node : detailtrs.childNodes()) { if (node instanceof Element) { if (((Element) node).tagName().equals("strong")) { if (!text.equals("") && !title.equals("")) { result.addDetail(new Detail(title.trim(), text.trim())); if (title.equals("Titel:")) { result.setTitle(text.trim()); } text = ""; } title = ((Element) node).text().trim(); } else { if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) { text = text + node.attr("href"); } else { text = text + ((Element) node).text(); } } } else if (node instanceof TextNode) { text = text + ((TextNode) node).text(); } } } else { if (doc2.select("#tab-content .fulltitle tr").size() > 0) { Elements rows = doc2.select("#tab-content .fulltitle tr"); for (Element tr : rows) { if (tr.children().size() == 2) { Element valcell = tr.child(1); String value = valcell.text().trim(); if (valcell.select("a").size() == 1) { value = valcell.select("a").first().absUrl("href"); } result.addDetail(new Detail(tr.child(0).text().trim(), value)); } } } else { result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR), stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL))); } } if (!text.equals("") && !title.equals("")) { result.addDetail(new Detail(title.trim(), text.trim())); if (title.equals("Titel:")) { result.setTitle(text.trim()); } } for (Element link : doc3.select("#tab-content a")) { Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href")); if (result.getId() == null) { // ID retrieval String key = hrefq.get("katkey"); if (key != null) { result.setId(key); break; } } } for (Element link : doc3.select(".box-container a")) { if (link.text().trim().equals("Download")) { result.addDetail( new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href"))); } } Map<String, Integer> copy_columnmap = new HashMap<>(); // Default values copy_columnmap.put("barcode", 1); copy_columnmap.put("branch", 3); copy_columnmap.put("status", 4); Elements copy_columns = doc.select("#tab-content .data tr#bg2 th"); for (int i = 0; i < copy_columns.size(); i++) { Element th = copy_columns.get(i); String head = th.text().trim(); if (head.contains("Status")) { copy_columnmap.put("status", i); } if (head.contains("Zweigstelle")) { copy_columnmap.put("branch", i); } if (head.contains("Mediennummer")) { copy_columnmap.put("barcode", i); } if (head.contains("Standort")) { copy_columnmap.put("location", i); } if (head.contains("Signatur")) { copy_columnmap.put("signature", i); } } Pattern status_lent = Pattern.compile( "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$"); Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$"); Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : exemplartrs) { try { Copy copy = new Copy(); Element status = tr.child(copy_columnmap.get("status")); Element barcode = tr.child(copy_columnmap.get("barcode")); String barcodetext = barcode.text().trim().replace(" Wegweiser", ""); // STATUS String statustext; if (status.getElementsByTag("b").size() > 0) { statustext = status.getElementsByTag("b").text().trim(); } else { statustext = status.text().trim(); } if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) { Matcher matcher1 = status_and_barcode.matcher(statustext); if (matcher1.matches()) { statustext = matcher1.group(1); barcodetext = matcher1.group(2); } } Matcher matcher = status_lent.matcher(statustext); if (matcher.matches()) { copy.setStatus(matcher.group(1)); copy.setReservations(matcher.group(3)); copy.setReturnDate(fmt.parseLocalDate(matcher.group(2))); } else { copy.setStatus(statustext); } copy.setBarcode(barcodetext); if (status.select("a[href*=doVormerkung]").size() == 1) { copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]); } String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", ""); copy.setBranch(branchtext); if (copy_columnmap.containsKey("location")) { copy.setLocation( tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", "")); } if (copy_columnmap.containsKey("signature")) { copy.setShelfmark( tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", "")); } result.addCopy(copy); } catch (Exception ex) { ex.printStackTrace(); } } try { Element isvolume = null; Map<String, String> volume = new HashMap<>(); Elements links = doc.select(".data td a"); int elcount = links.size(); for (int eli = 0; eli < elcount; eli++) { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8"); for (NameValuePair nv : anyurl) { if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) { isvolume = links.get(eli); } else if (nv.getName().equals("catKey")) { volume.put("catKey", nv.getValue()); } else if (nv.getName().equals("dbIdentifier")) { volume.put("dbIdentifier", nv.getValue()); } } if (isvolume != null) { volume.put("volume", "true"); result.setVolumesearch(volume); break; } } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
@Override public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException { if (!initialised) { start();/*w w w. j ava2 s. c o m*/ } if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongAllResult(MultiStepResult.Status.ERROR); } catch (OpacErrorException e) { return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage()); } } else if (logged_in_as.getId() != account.getId()) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongAllResult(MultiStepResult.Status.ERROR); } catch (OpacErrorException e) { return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage()); } } // We have to call the page we originally found the link on first... String html = httpGet(opac_url + "/userAccount.do?methodToCall=renewalPossible&renewal=account", ENCODING); Document doc = Jsoup.parse(html); if (doc.select("table.data").size() > 0) { List<Map<String, String>> result = new ArrayList<>(); for (Element td : doc.select("table.data tr td")) { Map<String, String> line = new HashMap<>(); if (!td.text().contains("Titel") || !td.text().contains("Status")) { continue; } String nextNodeIs = ""; for (Node n : td.childNodes()) { String text; if (n instanceof Element) { text = ((Element) n).text(); } else if (n instanceof TextNode) { text = ((TextNode) n).text(); } else { continue; } if (text.trim().length() == 0) { continue; } if (text.contains("Titel:")) { nextNodeIs = ProlongAllResult.KEY_LINE_TITLE; } else if (text.contains("Verfasser:")) { nextNodeIs = ProlongAllResult.KEY_LINE_AUTHOR; } else if (text.contains("Leihfristende:")) { nextNodeIs = ProlongAllResult.KEY_LINE_NEW_RETURNDATE; } else if (text.contains("Status:")) { nextNodeIs = ProlongAllResult.KEY_LINE_MESSAGE; } else if (text.contains("Mediennummer:") || text.contains("Signatur:")) { nextNodeIs = ""; } else if (nextNodeIs.length() > 0) { line.put(nextNodeIs, text.trim()); nextNodeIs = ""; } } result.add(line); } return new ProlongAllResult(MultiStepResult.Status.OK, result); } return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT)); }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * Parse a span with a class or not//from www .ja v a 2s . com * @param span the span in HTML */ private void parseSpan(Element span) throws JSONException { if (span.hasText()) { int offset = sb.length(); String name = span.attr("class"); Range r = new Range(name, offset, 0); if (name == null || name.length() == 0) name = "span"; if (isMilestone(name)) { pages.add(r); sb.append(span.text()); sb.append("\n"); pages.updateLen(r, sb.length() - offset); prevWasMilestone = true; } else if (name.equals("soft-hyphen")) { stil.add(r); // get previous word int i = sb.length() - 1; while (i > 0 && !Character.isWhitespace(sb.charAt(i))) i--; if (i > 0) i++; String prev = clean(sb.substring(i), true); // get next word String next = clean(nextWord(span), false); if (this.speller.isHardHyphen(prev, next)) r.name = "hard-hyphen"; sb.append(span.text()); stil.updateLen(r, sb.length() - offset); } else // span may contain other spans { stil.add(r); List<Node> children = span.childNodes(); for (Node child : children) { if (child instanceof Element) { String nName = child.nodeName().toLowerCase(); if (nName.equals("span")) parseSpan((Element) child); else parseOtherElement((Element) child); } else if (child instanceof TextNode) { TextNode tn = (TextNode) child; sb.append(tn.text()); } } if (isLineFormat(name)) ensure(1, false); stil.updateLen(r, sb.length() - offset); } } // else strangely no text: ignore it }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url + "/searchfoo"); if (doc.select(".error").size() > 0) { throw new OpacErrorException(doc.select(".error").text().trim()); } else if (doc.select(".nohits").size() > 0) { throw new OpacErrorException(doc.select(".nohits").text().trim()); } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) { return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); }//from ww w.j a va2 s .com int results_total = -1; String resultnumstr = doc.select(".box-header h2").first().text(); if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) { reusehtml = html; throw new OpacErrorException("is_a_redirect"); } else if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } Elements table = doc.select("table.data tbody tr"); identifier = null; Elements links = doc.select("table.data a"); boolean haslink = false; for (int i = 0; i < links.size(); i++) { Element node = links.get(i); if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils .parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select("td img[title]").size() > 0) { String title = tr.select("td img").get(0).attr("title"); String[] fparts = tr.select("td img").get(0).attr("src").split("/"); String fname = fparts[fparts.length - 1]; MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", "")); MediaType default_by_title = defaulttypes.get(title); MediaType default_name = default_by_title != null ? default_by_title : default_by_fname; if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(default_name); } } else { sr.setType(default_name); } } String alltext = tr.text(); if (alltext.contains("eAudio") || alltext.contains("eMusic")) { sr.setType(MediaType.MP3); } else if (alltext.contains("eVideo")) { sr.setType(MediaType.EVIDEO); } else if (alltext.contains("eBook")) { sr.setType(MediaType.EBOOK); } else if (alltext.contains("Munzinger")) { sr.setType(MediaType.EDOC); } if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) { sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src")); if (sr.getCover().contains("showCover.do")) { downloadCover(sr); } } Element middlething; if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) { middlething = tr.child(2); } else { middlething = tr.child(1); } List<Node> children = middlething.childNodes(); if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) { Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first(); if (indiv.children().size() > 1) { children = indiv.childNodes(); } } else if (middlething.select("span.titleData").size() == 1) { children = middlething.select("span.titleData").first().childNodes(); } int childrennum = children.size(); List<String[]> strings = new ArrayList<>(); for (int ch = 0; ch < childrennum; ch++) { Node node = children.get(ch); if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (text.length() > 3) { strings.add(new String[] { "text", "", text }); } } else if (node instanceof Element) { List<Node> subchildren = node.childNodes(); for (int j = 0; j < subchildren.size(); j++) { Node subnode = subchildren.get(j); if (subnode instanceof TextNode) { String text = ((TextNode) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") }); } } else if (subnode instanceof Element) { String text = ((Element) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") }); } } } } } StringBuilder description = null; if (tr.select("span.Z3988").size() == 1) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. List<NameValuePair> z3988data; boolean hastitle = false; try { description = new StringBuilder(); z3988data = URLEncodedUtils .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8"); for (NameValuePair nv : z3988data) { if (nv.getValue() != null) { if (!nv.getValue().trim().equals("")) { if (nv.getName().equals("rft.btitle") && !hastitle) { description.append("<b>").append(nv.getValue()).append("</b>"); hastitle = true; } else if (nv.getName().equals("rft.atitle") && !hastitle) { description.append("<b>").append(nv.getValue()).append("</b>"); hastitle = true; } else if (nv.getName().equals("rft.au")) { description.append("<br />").append(nv.getValue()); } else if (nv.getName().equals("rft.date")) { description.append("<br />").append(nv.getValue()); } } } } } catch (URISyntaxException e) { description = null; } } boolean described = false; if (description != null && description.length() > 0) { sr.setInnerhtml(description.toString()); described = true; } else { description = new StringBuilder(); } int k = 0; boolean yearfound = false; boolean titlefound = false; boolean sigfound = false; for (String[] part : strings) { if (!described) { if (part[0].equals("a") && (k == 0 || !titlefound)) { if (k != 0) { description.append("<br />"); } description.append("<b>").append(part[2]).append("</b>"); titlefound = true; } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) { yearfound = true; if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) { if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) { if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) { description.append("<br />"); description.append(part[2]); } } if (part.length == 4) { if (part[0].equals("span") && part[3].equals("textgruen")) { sr.setStatus(SearchResult.Status.GREEN); } else if (part[0].equals("span") && part[3].equals("textrot")) { sr.setStatus(SearchResult.Status.RED); } } else if (part.length == 5) { if (part[4].contains("purple")) { sr.setStatus(SearchResult.Status.YELLOW); } } if (sr.getStatus() == null) { if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht mglich")) || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) { sr.setStatus(SearchResult.Status.RED); } else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) { sr.setStatus(SearchResult.Status.YELLOW); } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurckgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) { sr.setStatus(SearchResult.Status.GREEN); } if (sr.getType() != null) { if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked // green though they are not available. { sr.setStatus(SearchResult.Status.UNKNOWN); } } } k++; } if (!described) { sr.setInnerhtml(description.toString()); } sr.setNr(10 * (page - 1) + i); sr.setId(null); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:autoInsurance.BeiJPiccImpl.java
public String queryHebJg(String in) { // TODO Auto-generated method stub Map<String, Object> outMap = new HashMap<String, Object>(); outMap.put("success", false); outMap.put("hebYj", ""); JSONObject jsonObj = JSONObject.fromObject(in); String toubDh = jsonObj.getString("toubDh"); if (!toubDh.equals("")) { String postData = "comCode=11026871&riskCode=DAA&prpCproposalVo.checkFlag=&prpCproposalVo.underWriteFlag=&prpCproposalVo.strStartDate=&prpCproposalVo.othFlag=&prpCproposalVo.checkUpCode=&prpCproposalVo.operatorCode1=&prpCproposalVo.businessNature=&noNcheckFlag=0&jfcdURL=http://10.134.136.48:8100/cbc&prpallURL=http://10.134.136.48:8000/prpall&bizNoZ=&pageNo_=1&pageSize_=10&scmIsOpen=1111100000&searchConditionSwitch=0&queryinterval=04&prpCproposalVo.proposalNo=" + toubDh/*from www . j av a2s .c om*/ + "&prpCproposalVo.policyNo=&prpCproposalVo.licenseNo=&prpCproposalVo.vinNo=&prpCproposalVo.insuredCode=&prpCproposalVo.insuredName=&prpCproposalVo.contractNo=&prpCproposalVo.operateDate=&prpCproposalVo.operateDate2=&prpCproposalVo.startDate=&prpCproposalVo.startDate2=&prpCproposalVo.dmFlag=all&prpCproposalVo.underWriteFlagC=&prpCproposalVo.brandName=&prpCproposalVo.engineNo=&prpCproposalVo.frameNo=&prpCproposalVo.riskCode=DAA,DZA&prpCproposalVo.appliCode=&prpCproposalVo.apliName=&prpCproposalVo.makeCom=&makeComDes=&prpCproposalVo.operatorCode=&operatorCodeDes=&prpCproposalVo.comCode=&comCodeDes=&prpCproposalVo.handlerCode=&handlerCodeDes=&prpCproposalVo.handler1Code=&handler1CodeDes=&prpCproposalVo.endDate=&prpCproposalVo.endDate2=&prpCproposalVo.underWriteEndDate=&prpCproposalVo.underWriteEndDate2="; Map<String, String> map = null; try { map = parse2Map(postData); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } String strURL = "http://10.134.136.48:8000/prpall/business/selectProposal.do?pageSize=10&pageNo=1"; String respStr = httpClientUtil.doPost(strURL, map, "GBK"); System.out.println(": " + respStr); strURL = "http://10.134.136.48:8000/prpall/business/showUndwrtMsg.do?bizNo=" + toubDh + "&bizType=PROPOSAL"; respStr = httpClientUtil.doPost(strURL, new HashMap<String, String>(), "GBK"); // System.out.println(respStr); Document doc = Jsoup.parse(respStr); // System.out.println(doc.title()); Element element = doc.getElementById("bpmUwNotionX[0].handleText"); if (element != null) { String hebYj = element.childNodes().get(0).toString(); outMap.put("hebYj", hebYj); outMap.put("success", true); } } return JSONObject.fromObject(outMap).toString(); }
From source file:autoInsurance.BeiJPiccImpl.java
public String saveAndHeB(String in, Map<String, String> formMap) { // TODO Auto-generated method stub JSONObject jsonObject = JSONObject.fromObject(in); String hebXianZ = jsonObject.getString("hebXianZ"); // String saveHebId = jsonObject.getString("saveHebId"); JSONArray relation = jsonObject.getJSONArray("relation"); String carOwner_customMobile = ""; String carOwner_addressCName = ""; String carOwner_customerCode = ""; String carOwner_identifyNumber = ""; String carOwner_customerCName = ""; for (Object object : relation) { JSONObject jObj = (JSONObject) object; // if(jObj.getString("type").equals("cheZ")) { carOwner_customMobile = jObj.getString("customMobile"); carOwner_addressCName = jObj.getString("addressCName"); carOwner_customerCode = jObj.getString("customerCode"); carOwner_identifyNumber = jObj.getString("identifyNumber"); carOwner_customerCName = jObj.getString("customerCName"); continue; // } }//from w w w . j a v a 2s . c om formMap.put("insuredCarOwner", carOwner_customerCName); formMap.put("prpCinsureds[0].sex", "1");//0 formMap.put("prpCinsureds[0].age", "49");// formMap.put("prpCinsureds[0].insuredFlag", "11100000000000000000000000000A"); formMap.put("prpCinsureds[0].auditStatus", "2");// formMap.put("prpCinsureds[0].versionNo", "3");// formMap.put("prpCinsureds[0].identifyNumber", carOwner_identifyNumber);// formMap.put("prpCinsureds[0].insuredAddress", carOwner_addressCName);// formMap.put("prpCinsureds[0].insuredCode", carOwner_customerCode);// formMap.put("prpCinsureds[0].drivingLicenseNo", carOwner_identifyNumber);// formMap.put("prpCinsureds[0].mobile", carOwner_customMobile);// formMap.put("prpCcarShipTax.taxPayerCode", carOwner_customerCode);// formMap.put("prpCcarShipTax.taxPayerNumber", carOwner_identifyNumber);// formMap.put("prpCcarShipTax.taxPayerIdentNo", carOwner_identifyNumber);// String url = "http://10.134.136.48:8000/prpall/business/refreshPlanByTimes.do"; String respStr = httpClientUtil.doPost(url, formMap, "gbk"); System.out.println(":"); System.out.println(respStr); Map planMap = JackJson.fromJsonToObject(respStr, Map.class); List planData = (List) planMap.get("data"); for (int i = 0; i < planData.size(); i++) { Map plan = (Map) planData.get(i); formMap.put("cplans[" + i + "].backPlanFee", plan.get("planFee").toString()); formMap.put("cplans[" + i + "].planFee", plan.get("planFee").toString());//null formMap.put("cplan[" + i + "].payReasonC", plan.get("payReasonName").toString());//null formMap.put("prpCplanTemps[" + i + "].netPremium", plan.get("netPremium") == null ? "" : plan.get("netPremium").toString());//7275.58 formMap.put("prpCplanTemps[" + i + "].payReason", plan.get("payReason").toString());//null formMap.put("prpCplanTemps[" + i + "].taxPremium", plan.get("taxPremium") == null ? "" : plan.get("taxPremium").toString());//436.54 formMap.put("prpCplanTemps[" + i + "].planDate", timeStamp2Date(((Map) plan.get("planDate")).get("time").toString(), "yyyy-M-d"));//null formMap.put("prpCplanTemps[" + i + "].subsidyRate", plan.get("subsidyRate").toString());//null formMap.put("prpCplanTemps[" + i + "].payNo", plan.get("payNo").toString());//null formMap.put("prpCplanTemps[" + i + "].isBICI", plan.get("isBICI").toString());//null formMap.put("prpCplanTemps[" + i + "].planFee", plan.get("planFee").toString());//null formMap.put("prpCplanTemps[" + i + "].delinquentFee", plan.get("delinquentFee").toString());//null formMap.put("cplan_[" + i + "].payReasonC", plan.get("payReasonName").toString());//null } url = "http://10.134.136.48:8000/prpall/business/queryPayFor.do?agreementNo=&riskCode=DAA&comCode=11026871&chgCostRate=0"; respStr = httpClientUtil.doPost(url, formMap, "gbk"); System.out.println("querypayfor:"); System.out.println(respStr); Map payMap = JackJson.fromJsonToObject(respStr, Map.class); if (((List) payMap.get("data")).size() < 1) return "\"success\": false, \"msg\":\" queryPayFor \""; List prpDpayForPolicies = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpDpayForPolicies"); for (int i = 0; i < prpDpayForPolicies.size(); i++) { Map dpay = (Map) prpDpayForPolicies.get(i); formMap.put("prpCcommissionsTemp[" + i + "].agreementNo", ((Map) dpay.get("id")).get("agreementNo").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].riskCode", dpay.get("riskCode").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].auditRate", "");//null formMap.put("prpCcommissionsTemp[" + i + "].coinsRate", "100");//null formMap.put("prpCcommissionsTemp[" + i + "].adjustFlag", dpay.get("adjustFlag").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costRateUpper", dpay.get("costRateUpper").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].configCode", ((Map) dpay.get("id")).get("configCode").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costType", dpay.get("costType").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costFee", "0");//null if (dpay.get("riskCode").toString().equals("DAA")) { formMap.put("prpCcommissionsTemp[" + i + "].sumPremium", formMap.get("prpCmain.sumPremium"));//null } else { formMap.put("prpCcommissionsTemp[" + i + "].sumPremium", formMap.get("prpCitemKindCI.premium"));//null } formMap.put("prpCcommissionsTemp[" + i + "].costRate", dpay.get("costRate").toString());//null } List prpDdismantleDetails = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpDdismantleDetails"); for (int i = 0; i < prpDdismantleDetails.size(); i++) { Map dismant = (Map) prpDdismantleDetails.get(i); formMap.put("prpDdismantleDetails[" + i + "].roleName", dismant.get("roleName").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.agreementNo", ((Map) dismant.get("id")).get("agreementNo").toString()); formMap.put("prpDdismantleDetails[" + i + "].roleFlag", dismant.get("roleFlag").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.configCode", ((Map) dismant.get("id")).get("configCode").toString()); formMap.put("prpDdismantleDetails[" + i + "].roleCode_uni", dismant.get("roleCode_uni").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.roleCode", ((Map) dismant.get("id")).get("roleCode").toString()); formMap.put("prpDdismantleDetails[" + i + "].costRate", dismant.get("costRate").toString()); formMap.put("prpDdismantleDetails[" + i + "].businessNature", dismant.get("businessNature").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.assignType", ((Map) dismant.get("id")).get("assignType").toString()); formMap.put("prpDdismantleDetails[" + i + "].flag", dismant.get("flag").toString()); } List prpCsaless = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpCsaless"); prpCsaless = prpCsaless == null ? new ArrayList() : prpCsaless; for (int j = 0; j < prpCsaless.size(); j++) { Map csale = (Map) prpCsaless.get(j); formMap.put("prpCsaless[" + j + "].totalRate", csale.get("totalRate").toString()); formMap.put("prpCsaless[" + j + "].riskCode", csale.get("riskCode").toString()); formMap.put("prpCsaless[" + j + "].id.salesCode", ((Map) csale.get("id")).get("salesCode").toString()); formMap.put("prpCsaless[" + j + "].oriSplitNumber", csale.get("oriSplitNumber").toString()); formMap.put("prpCsaless[" + j + "].id.salesDetailCode", ((Map) csale.get("id")).get("salesDetailCode").toString()); formMap.put("prpCsaless[" + j + "].totalRateMax", csale.get("totalRateMax").toString()); formMap.put("prpCsaless[" + j + "].splitWay", csale.get("splitWay").toString()); formMap.put("prpCsaless[" + j + "].splitFee", csale.get("splitFee").toString()); formMap.put("prpCsaless[" + j + "].id.proposalNo", ((Map) csale.get("id")).get("proposalNo").toString()); formMap.put("prpCsaless[" + j + "].salesName", csale.get("salesName").toString()); formMap.put("prpCsaless[" + j + "].salesDetailName", csale.get("salesDetailName").toString()); formMap.put("prpCsaless[" + j + "].splitRate", csale.get("splitRate").toString()); formMap.put("prpCsaless[" + j + "].flag", csale.get("flag").toString()); formMap.put("prpCsaless[" + j + "].agreementNo", csale.get("agreementNo").toString()); } Map<String, Object> outMap = new HashMap<String, Object>(); outMap.put("success", "false"); outMap.put("msg", ""); respStr = httpClientUtil.doPost("http://10.134.136.48:8000/prpall/business/insert4S.do", formMap, "GBK"); System.out.println(": " + respStr); String toubdH = respStr.split(",")[0]; String toubdH2 = ""; if (respStr.split(",").length > 1) toubdH2 = respStr.split(",")[1]; if (!respStr.contains("errorMessage")) { String respStr2 = httpClientUtil.doPost( "http://10.134.136.48:8000/prpall/business/editSubmitUndwrt.do?bizNo=" + toubdH, new HashMap<String, String>(), "GBK"); respStr2 = httpClientUtil.doPost( "http://10.134.136.48:8000/prpall/business/editSubmitUndwrt.do?bizNo=" + toubdH2, new HashMap<String, String>(), "GBK"); outMap.put("syxToubdh", toubdH); outMap.put("jqxToubdh", toubdH2); outMap.put("success", "true"); outMap.put("msg", ""); Document doc = null; try { String strURL = "http://10.134.136.48:8000/prpall/business/showUndwrtMsg.do?bizNo=" + toubdH + "&bizType=PROPOSAL"; if (!toubdH.equals("")) { respStr = httpClientUtil.doPost(strURL, new HashMap<String, String>(), "GBK"); System.out.println(respStr); doc = Jsoup.parse(respStr); ; System.out.println(", " + doc.title()); Element element = doc.getElementById("bpmUwNotionX[0].handleText"); if (element != null) { String syxHbYj = element.childNodes().get(0).toString(); outMap.put("syxHbYj", syxHbYj); } } strURL = "http://10.134.136.48:8000/prpall/business/showUndwrtMsg.do?bizNo=" + toubdH2 + "&bizType=PROPOSAL"; if (!toubdH2.equals("")) { respStr = httpClientUtil.doPost(strURL, new HashMap<String, String>(), "GBK"); System.out.println(respStr); doc = Jsoup.parse(respStr); System.out.println(", " + doc.title()); Element element = doc.getElementById("bpmUwNotionX[0].handleText"); if (element != null) { String jqxHbYj = element.childNodes().get(0).toString(); outMap.put("jqxHbYj", jqxHbYj); } } outMap.put("success", "true"); outMap.put("msg", ""); } catch (Exception e) { outMap.put("success", "false"); outMap.put("msg", e.getMessage()); } } else outMap.put("msg", respStr); // outMap.put("success", "true"); // outMap.put("msg", ""); // if(hebXianZ.equals("0")) { // outMap.put("syxToubdh", "TDDA201611020000717134"); // outMap.put("syxHbYj", " "); // } // // if(hebXianZ.equals("1")) { // outMap.put("jqxToubdh", "TDZA201611020000717134"); // outMap.put("jqxHbYj", " "); // } // // if(hebXianZ.equals("2")) { // outMap.put("syxToubdh", "TDDA201611020000717134"); // outMap.put("syxHbYj", " "); // outMap.put("jqxToubdh", "TDZA201611020000717134"); // outMap.put("jqxHbYj", " "); // } return JSONObject.fromObject(outMap).toString(); }