List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java
public static HttpPost getPostForm(final String requestBaseUrl, final Element form, final HashMap<String, String> formMapping) throws MalformedURLException { final List<NameValuePair> formNvps = new ArrayList<NameValuePair>(); final String formAction = form.attr("action"); final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction)); final Elements formFields = form.select("input"); for (final Element element : formFields) { final String fieldName = element.attr("name"); String fieldValue = element.attr("value"); final String fieldId = element.attr("id"); if (formMapping != null) { final String mappedValue = formMapping.get(fieldId); if (mappedValue != null) { fieldValue = mappedValue; }/*from w w w . j a v a2 s . c o m*/ } log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue, fieldId)); formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } try { formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8")); } catch (final UnsupportedEncodingException e) { // This would never happen throw new IllegalArgumentException("UTF-8 not recognised"); } return formPost; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
public final static void applyMessages(Element target) { Context context = Context.getCurrentThreadContext(); List<Element> msgElems = target.select(ExtNodeConstants.MSG_NODE_TAG_SELECTOR); for (final Element msgElem : msgElems) { Attributes attributes = msgElem.attributes(); String key = attributes.get(ExtNodeConstants.MSG_NODE_ATTR_KEY); // List<String> externalizeParamKeys = getExternalizeParamKeys(attributes); Object defaultMsg = new Object() { @Override// w w w .jav a 2 s.co m public String toString() { return ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX + msgElem.html(); } }; Locale locale = LocalizeUtil.getLocale(attributes.get(ExtNodeConstants.MSG_NODE_ATTR_LOCALE)); String currentTemplatePath = attributes.get(ExtNodeConstants.ATTR_TEMPLATE_PATH); if (StringUtils.isEmpty(currentTemplatePath)) { logger.warn("There is a msg tag which does not hold corresponding template file path:{}", msgElem.outerHtml()); } else { context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath); } final Map<String, Object> paramMap = getMessageParams(attributes, locale, key); String text; switch (I18nMessageHelperTypeAssistant.configuredHelperType()) { case Mapped: text = I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessageWithDefault(locale, key, defaultMsg, paramMap); break; case Ordered: default: // convert map to array List<Object> numberedParamNameList = new ArrayList<>(); for (int index = 0; paramMap .containsKey(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index); index++) { numberedParamNameList.add(paramMap.get(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index)); } text = I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessageWithDefault(locale, key, defaultMsg, numberedParamNameList.toArray()); } Node node; if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX)) { node = ElementUtil.text(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX.length())); } else if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX)) { node = ElementUtil .parseAsSingle(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX.length())); } else { node = ElementUtil.text(text); } msgElem.replaceWith(node); context.setData(TRACE_VAR_TEMPLATE_PATH, null); } }
From source file:io.andyc.papercut.api.PrintApi.java
/** * Get the different printers that we can print to and return an array of * the different printer types/*w w w . j a v a 2 s. c o m*/ * * @return {PrinterOption[]} - An array of print options */ public static ArrayList<PrinterOption> getPrinterOptions(SessionFactory.Session session) throws IOException, ExpiredSessionException, PrintingException { Elements inputValues = PrintApi.buildConnection(session, "?service=action/1/UserWebPrint/0/%24ActionLink") .execute().parse().select("form").select("div.wizard-body").select("table.results").select("label"); ArrayList<PrinterOption> result = new ArrayList<>(); for (Element element : inputValues) { String name = element.select("input").attr("name"); String value = element.select("input").attr("value"); if (name.isEmpty() || value.isEmpty()) { throw new PrintingException("Cannot parse name and/or value of printing options"); } result.add(new PrinterOption(name, value, element.text())); } if (result.size() == 0) { throw new PrintingException("Cannot parse printer options"); } return result; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Map<String, String> getInheritedMethods(Element summaryAnchor) { Map<String, String> inherited = new HashMap<>(); if (summaryAnchor == null) return inherited; summaryAnchor = summaryAnchor.parent(); Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]"); for (Element inheritAnchor : inheritAnchors) { if (inheritAnchor.siblingElements().size() != 2) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element next = inheritAnchor.nextElementSibling(); if (!next.tagName().equals("h3")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element sub = next.children().last(); if (sub == null || !sub.tagName().equals("a")) continue; String parent = sub.text().toLowerCase(); next = next.nextElementSibling(); if (!next.tagName().equals("code")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) { if (sub.tagName().equals("a")) { inherited.putIfAbsent(sub.text().toLowerCase(), parent); }/*ww w .j ava 2 s . c o m*/ } } return inherited; }
From source file:io.jari.geenstijl.API.API.java
private static Artikel parseArtikel(Element artikel_el, Context context) throws ParseException { Artikel artikel = new Artikel(); //id//from ww w.j a va2s. c om artikel.id = Integer.parseInt(artikel_el.attr("id").substring(1)); //summary artikel.summary = artikel_el.select("a.more").first() != null; //titel artikel.titel = artikel_el.select("h1").text(); //plaatje if (PreferenceManager.getDefaultSharedPreferences(context).getBoolean("show_images", true)) { Element plaatje = artikel_el.select("img").first(); if (plaatje != null) { try { String url = plaatje.attr("src"); Log.d(TAG, "Downloading " + url); // artikel.plaatje = Drawable.createFromStream(((java.io.InputStream)new URL(plaatje.attr("src")).getContent()), null); artikel.plaatje = readBytes((InputStream) new URL(plaatje.attr("src")).getContent()); artikel.groot_plaatje = plaatje.hasClass("groot"); if (plaatje.hasAttr("width") && plaatje.hasAttr("height")) if (!plaatje.attr("width").equals("100") || !plaatje.attr("height").equals("100")) artikel.groot_plaatje = true; if (artikel.groot_plaatje) Log.i(TAG, " Done. Big image."); else Log.i(TAG, " Done."); } catch (Exception ex) { Log.w(TAG, "Unable to download image, Falling back... Reason: " + ex.getMessage()); artikel.plaatje = null; } } } //embed if (artikel_el.select("div.embed").first() != null) { //atm alleen support voor iframes Element frame = artikel_el.select("div.embed>iframe").first(); if (frame != null) artikel.embed = frame.attr("src"); } //embed (geenstijl.tv) if (!domain.equals("www.geenstijl.nl")) { //extract url from script Element scriptEl = artikel_el.select("script").first(); if (scriptEl != null) { String script = scriptEl.html(); Pattern pattern = Pattern.compile("'(.*)', fall"); Matcher matcher = pattern.matcher(script); if (matcher.find() && matcher.groupCount() == 1) { artikel.embed = matcher.group(1); } } } //footer shit Element footer = artikel_el.select("footer").first(); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm", Locale.US); artikel.datum = simpleDateFormat.parse(footer.select("time").first().attr("datetime")); StringTokenizer footer_items = new StringTokenizer(footer.text(), "|"); artikel.auteur = footer_items.nextToken().trim(); artikel.reacties = Integer.parseInt(footer.select("a.comments").text().replace(" reacties", "")); artikel.link = footer.select("a").first().attr("href"); //clean up artikel_el.select("h1").remove(); artikel_el.select(".embed").remove(); artikel_el.select("img").remove(); artikel_el.select("footer").remove(); artikel_el.select("a.more").remove(); artikel_el.select("script").remove(); //inhoud artikel.inhoud = artikel_el.html(); return artikel; }
From source file:me.vertretungsplan.parser.UntisInfoParser.java
private static void parseTimetableCellContent(Substitution s, String type, Element td) { String value = td.text();/*from ww w .j ava 2 s.c o m*/ if (value.startsWith("*")) { value = value.substring(1); } if (value.equals("---.") || value.equals("---")) { value = null; } boolean striked = td.select("strike").text().equals(td.text()); switch (type) { case "subject": if (striked) { s.setPreviousSubject(value); } else { s.setSubject(value); } break; case "teacher": if (striked) { s.setPreviousTeacher(value); } else { s.setTeacher(value); } break; case "room": if (striked) { s.setPreviousRoom(value); } else { s.setRoom(value); } break; } }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text getPinContent(String url, DBCollection pinsCollection) throws JSONException { Document html = null;//from w w w. j a v a 2 s . c o m JSONObject pin = new JSONObject(); try { html = Jsoup.connect(url).get(); } catch (Exception e) { return new Text("HTTP connection failed..."); } // Gather major pins data Element doc = html.select("body").first(); // Pin ID String id = (url.split("pin/")[1].split("/")[0]); pin.append("ID", id); // Pin image String imageURL = ""; Element tmp = doc.select("div[class=pinImageSourceWrapper]").first(); try { tmp = tmp.select("div[class=imageContainer]").select("img").first(); imageURL = tmp.attr("src"); } catch (Exception e) { } // try{ // ByteArrayOutputStream pimg=new ByteArrayOutputStream(), cimg = new ByteArrayOutputStream(); // for(int i=0; i<3; i++){ // BufferedImage img=dummyImage; // try{ // img = ImageIO.read(new URL(imageURL)); // // }catch(Exception e){} // ImageIO.write(img, "jpg", cimg); // if(pimg.size()<cimg.size()){ // pimg = cimg; // } // } // // save to hdfs // Configuration conf = new Configuration(); // FileSystem fs = FileSystem.get(conf); // Path outFile = new Path("/home/hadoop/"+id+".png"); // FSDataOutputStream out = fs.create(outFile); // out.write(pimg.toByteArray()); // // }catch(Exception e){ // e.printStackTrace(); // } pin.append("image", imageURL); //Pin name tmp = doc.select("h2[itemprop=name]").first(); String name = ""; if (tmp != null) { name = tmp.text().trim(); } pin.append("name", name); // Pin source Element sourceCont = doc.select("div[class=sourceFlagWrapper]").first(); JSONObject source = new JSONObject(); if (sourceCont != null) { String title = sourceCont.text().trim(); String src = sourceCont.select("a").first().attr("href"); source.append("title", title); source.append("src", src); } pin.append("source", source); //pin credit JSONObject pinCredit = new JSONObject(); Element credit = doc.select("div[class=pinCredits]").first(); String creditName = "", creditTitle = "", creditSource = ""; try { creditName = credit.select("div[class=creditName]").text().trim(); } catch (Exception e) { } try { creditTitle = credit.select("div[class=creditTitle]").text().trim(); } catch (Exception e) { } try { creditSource = credit.select("a").attr("href"); } catch (Exception e) { } pinCredit.append("name", creditName); pinCredit.append("title", creditTitle); pinCredit.append("src", creditSource); pin.append("credit", pinCredit); //comments JSONArray comments = new JSONArray(); Elements commentsConts = doc.select("div[class=commenterNameCommentText]"); for (Element commentCont : commentsConts) { JSONObject comment = new JSONObject(); Element creatorEle = commentCont.select("div[class=commenterWrapper] a").first(); String creatorName = creatorEle.text().trim(); String creatorSrc = creatorEle.attr("href"); String content = "", raw = ""; Element commentContent = commentCont.select(".commentDescriptionContent").first(); try { content = commentContent.text().trim(); raw = commentContent.html(); comment.append("creator", creatorName); comment.append("creator_url", creatorSrc); comment.append("content", content); comment.append("content_raw", raw); comments.put(comment); } catch (Exception e) { } } pin.append("comments", comments); //pin board link and related pins Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first(); //pin board JSONArray board = new JSONArray(); if (bottomDoc != null) { Element boardEle = bottomDoc.select("div[class=boardHeader]").first(); JSONObject b = new JSONObject(); String boardName = ""; try { boardName = boardEle.select("h3[class=title]").text().trim(); } catch (Exception ee) { } String boardSrc = ""; try { boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim(); } catch (Exception ee) { } b.append("name", boardName); b.append("src", boardSrc); board.put(b); } pin.append("board", board); //CAUTION: what if a pin shows up in different boards? //related pins bottomDoc = doc .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]") .first(); JSONArray relatedPins = new JSONArray(); if (bottomDoc != null) { Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]"); for (Element relatedPinsCont : relatedPinsConts) { JSONObject relatedPin = new JSONObject(); try { relatedPin.append("src", "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href")); } catch (Exception e) { } relatedPins.put(relatedPin); } } pin.append("related_pins", relatedPins); // Optional: push data to database BasicDBObject dbObject = (BasicDBObject) JSON.parse(pin.toString()); pinsCollection.insert(dbObject); return new Text(pin.toString()); }
From source file:me.vertretungsplan.parser.DaVinciParser.java
static void parseDaVinciTable(Element table, SubstitutionSchedule v, String klasse, SubstitutionScheduleDay day, ColorProvider colorProvider) {//from ww w . j ava2 s .c o m List<String> headers = new ArrayList<>(); for (Element header : table.select("thead tr th, tr td[bgcolor=#9999FF]")) { headers.add(header.text()); } // These three variables can Set<String> classes = new HashSet<>(); String lesson = null; LocalDate currentDate = null; Pattern previousCurrentPattern = Pattern.compile("\\+([^\\s]+) \\(([^)]+)\\)"); Pattern previousPattern = Pattern.compile("\\(([^)]+)\\)"); for (Element row : table.select("tr:not(thead tr, tr:has(td[bgcolor=#9999FF]))")) { Substitution subst = new Substitution(); LocalDate substDate = null; Elements columns = row.select("td"); for (int i = 0; i < headers.size(); i++) { String value = columns.get(i).text().replace("\u00a0", ""); String header = headers.get(i); if (value.isEmpty()) { if (header.equals("Klasse")) subst.setClasses(classes); if (header.equals("Pos") || header.equals("Stunde") || header.equals("Std.")) { subst.setLesson(lesson); } if (header.equals("Art") || header.equals("Merkmal")) subst.setType("Vertretung"); if (header.equals("Datum")) substDate = currentDate; continue; } Matcher previousCurrentMatcher = previousCurrentPattern.matcher(value); Matcher previousMatcher = previousPattern.matcher(value); switch (header) { case "Klasse": String classesStr = value; if (previousMatcher.find()) { classesStr = previousMatcher.group(1); } classes = new HashSet<>(Arrays.asList(classesStr.split(", "))); subst.setClasses(classes); break; case "Pos": case "Stunde": case "Std.": lesson = value; subst.setLesson(lesson); break; case "VLehrer Krzel": case "VLehrer": case "Vertreter": case "Vertretungslehrkraft": if (!value.startsWith("*")) { subst.setTeacher(value); } else { subst.setType(value.substring(1)); } break; case "Lehrer": case "Lehrer Krzel": case "Lehrer Name": case "Lehrkraft": if (previousCurrentMatcher.find()) { subst.setTeacher(previousCurrentMatcher.group(1)); subst.setPreviousTeacher(previousCurrentMatcher.group(2)); } else if (previousMatcher.find()) { subst.setPreviousTeacher(previousMatcher.group(1)); } else { subst.setPreviousTeacher(value); } break; case "VFach": case "V Fach": subst.setSubject(value); break; case "Fach": case "Original Fach": if (previousCurrentMatcher.find()) { subst.setSubject(previousCurrentMatcher.group(1)); subst.setPreviousSubject(previousCurrentMatcher.group(2)); } else { subst.setPreviousSubject(value); } break; case "VRaum": case "V Raum": subst.setRoom(value); break; case "Raum": case "Original Raum": if (previousCurrentMatcher.find()) { subst.setRoom(previousCurrentMatcher.group(1)); subst.setPreviousRoom(previousCurrentMatcher.group(2)); } else { subst.setPreviousRoom(value); } break; case "Art": case "Merkmal": subst.setType(value); break; case "Info": case "Mitteilung": subst.setDesc(value); break; case "Datum": substDate = ParserUtils.parseDate(value); currentDate = substDate; break; } } if (klasse != null) { Set<String> fixedClasses = new HashSet<>(); fixedClasses.add(klasse); subst.setClasses(fixedClasses); } if (subst.getType() == null) { String recognizedType = null; if (subst.getDesc() != null) recognizedType = recognizeType(subst.getDesc()); subst.setType(recognizedType != null ? recognizedType : "Vertretung"); } subst.setColor(colorProvider.getColor(subst.getType())); if (substDate == null && day == null) continue; if (day == null || substDate != null && !substDate.equals(day.getDate())) { day = null; for (SubstitutionScheduleDay d : v.getDays()) { if (d.getDate().equals(substDate)) { day = d; } } if (day == null) { day = new SubstitutionScheduleDay(); day.setDate(substDate); v.addDay(day); } } day.addSubstitution(subst); } }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
private static String findLastChangeFromMonHeadTable(Element monHead) { if (monHead.select("td[align=right]").size() == 0) return null; String lastChange = null;/* w w w. ja v a 2 s . com*/ Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d \\d\\d:\\d\\d"); Matcher matcher = pattern.matcher(monHead.select("td[align=right]").first().text()); if (matcher.find()) { lastChange = matcher.group(); } else if (monHead.text().contains("Stand: ")) { lastChange = monHead.text().substring(monHead.text().indexOf("Stand:") + "Stand:".length()).trim(); } return lastChange; }
From source file:com.itcs.commons.email.EmailAutoconfigClient.java
private static void extractIncommingServerSettings(Document doc, Map<String, String> settings, String type) { for (Element element : doc.select("incomingServer")) { // System.out.println("element.attr(\"type\"):"+element.attr("type")); if (element.attr("type").equals(type)) { // System.out.println("element.select(\"hostname\"):" + element.select("hostname").text()); settings.put(EnumEmailSettingKeys.INBOUND_SERVER.getKey(), element.select("hostname").text()); // System.out.println("element.select(\"port\"):" + element.select("port").text()); settings.put(EnumEmailSettingKeys.INBOUND_PORT.getKey(), element.select("port").text()); // System.out.println("element.select(\"socketType\"):" + element.select("socketType").text()); settings.put(EnumEmailSettingKeys.INBOUND_SSL_ENABLED.getKey(), element.select("socketType").text().trim().equals("SSL") ? "true" : "false"); }/* w w w .j a va 2s .c om*/ } }