Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java

public static HttpPost getPostForm(final String requestBaseUrl, final Element form,
        final HashMap<String, String> formMapping) throws MalformedURLException {
    final List<NameValuePair> formNvps = new ArrayList<NameValuePair>();
    final String formAction = form.attr("action");
    final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction));
    final Elements formFields = form.select("input");
    for (final Element element : formFields) {
        final String fieldName = element.attr("name");
        String fieldValue = element.attr("value");
        final String fieldId = element.attr("id");

        if (formMapping != null) {
            final String mappedValue = formMapping.get(fieldId);
            if (mappedValue != null) {
                fieldValue = mappedValue;
            }/*from  w w  w  .  j a  v a2 s  . c  o  m*/
        }

        log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue,
                fieldId));
        formNvps.add(new BasicNameValuePair(fieldName, fieldValue));
    }
    try {
        formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8"));
    } catch (final UnsupportedEncodingException e) {
        // This would never happen
        throw new IllegalArgumentException("UTF-8 not recognised");
    }

    return formPost;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

public final static void applyMessages(Element target) {
    Context context = Context.getCurrentThreadContext();
    List<Element> msgElems = target.select(ExtNodeConstants.MSG_NODE_TAG_SELECTOR);
    for (final Element msgElem : msgElems) {
        Attributes attributes = msgElem.attributes();
        String key = attributes.get(ExtNodeConstants.MSG_NODE_ATTR_KEY);
        // List<String> externalizeParamKeys = getExternalizeParamKeys(attributes);
        Object defaultMsg = new Object() {
            @Override//  w  w  w .jav a 2  s.co  m
            public String toString() {
                return ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX + msgElem.html();
            }
        };
        Locale locale = LocalizeUtil.getLocale(attributes.get(ExtNodeConstants.MSG_NODE_ATTR_LOCALE));
        String currentTemplatePath = attributes.get(ExtNodeConstants.ATTR_TEMPLATE_PATH);
        if (StringUtils.isEmpty(currentTemplatePath)) {
            logger.warn("There is a msg tag which does not hold corresponding template file path:{}",
                    msgElem.outerHtml());
        } else {
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);
        }

        final Map<String, Object> paramMap = getMessageParams(attributes, locale, key);
        String text;
        switch (I18nMessageHelperTypeAssistant.configuredHelperType()) {
        case Mapped:
            text = I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessageWithDefault(locale, key,
                    defaultMsg, paramMap);
            break;
        case Ordered:
        default:
            // convert map to array
            List<Object> numberedParamNameList = new ArrayList<>();
            for (int index = 0; paramMap
                    .containsKey(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index); index++) {
                numberedParamNameList.add(paramMap.get(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index));
            }
            text = I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessageWithDefault(locale,
                    key, defaultMsg, numberedParamNameList.toArray());
        }

        Node node;
        if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX)) {
            node = ElementUtil.text(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX.length()));
        } else if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX)) {
            node = ElementUtil
                    .parseAsSingle(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX.length()));
        } else {
            node = ElementUtil.text(text);
        }
        msgElem.replaceWith(node);

        context.setData(TRACE_VAR_TEMPLATE_PATH, null);
    }
}

From source file:io.andyc.papercut.api.PrintApi.java

/**
 * Get the different printers that we can print to and return an array of
 * the different printer types/*w  w  w  . j  a v a  2  s.  c o m*/
 *
 * @return {PrinterOption[]} - An array of print options
 */
public static ArrayList<PrinterOption> getPrinterOptions(SessionFactory.Session session)
        throws IOException, ExpiredSessionException, PrintingException {
    Elements inputValues = PrintApi.buildConnection(session, "?service=action/1/UserWebPrint/0/%24ActionLink")
            .execute().parse().select("form").select("div.wizard-body").select("table.results").select("label");

    ArrayList<PrinterOption> result = new ArrayList<>();
    for (Element element : inputValues) {
        String name = element.select("input").attr("name");
        String value = element.select("input").attr("value");
        if (name.isEmpty() || value.isEmpty()) {
            throw new PrintingException("Cannot parse name and/or value of printing options");
        }
        result.add(new PrinterOption(name, value, element.text()));

    }

    if (result.size() == 0) {
        throw new PrintingException("Cannot parse printer options");
    }
    return result;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Map<String, String> getInheritedMethods(Element summaryAnchor) {
    Map<String, String> inherited = new HashMap<>();
    if (summaryAnchor == null)
        return inherited;
    summaryAnchor = summaryAnchor.parent();
    Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]");
    for (Element inheritAnchor : inheritAnchors) {
        if (inheritAnchor.siblingElements().size() != 2)
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element next = inheritAnchor.nextElementSibling();
        if (!next.tagName().equals("h3"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element sub = next.children().last();
        if (sub == null || !sub.tagName().equals("a"))
            continue;
        String parent = sub.text().toLowerCase();
        next = next.nextElementSibling();
        if (!next.tagName().equals("code"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) {
            if (sub.tagName().equals("a")) {
                inherited.putIfAbsent(sub.text().toLowerCase(), parent);
            }/*ww  w .j  ava 2 s  .  c o  m*/
        }
    }
    return inherited;
}

From source file:io.jari.geenstijl.API.API.java

private static Artikel parseArtikel(Element artikel_el, Context context) throws ParseException {
    Artikel artikel = new Artikel();

    //id//from  ww w.j a va2s. c  om
    artikel.id = Integer.parseInt(artikel_el.attr("id").substring(1));

    //summary
    artikel.summary = artikel_el.select("a.more").first() != null;

    //titel
    artikel.titel = artikel_el.select("h1").text();

    //plaatje
    if (PreferenceManager.getDefaultSharedPreferences(context).getBoolean("show_images", true)) {
        Element plaatje = artikel_el.select("img").first();
        if (plaatje != null) {
            try {
                String url = plaatje.attr("src");
                Log.d(TAG, "Downloading " + url);
                //                    artikel.plaatje = Drawable.createFromStream(((java.io.InputStream)new URL(plaatje.attr("src")).getContent()), null);
                artikel.plaatje = readBytes((InputStream) new URL(plaatje.attr("src")).getContent());
                artikel.groot_plaatje = plaatje.hasClass("groot");
                if (plaatje.hasAttr("width") && plaatje.hasAttr("height"))
                    if (!plaatje.attr("width").equals("100") || !plaatje.attr("height").equals("100"))
                        artikel.groot_plaatje = true;
                if (artikel.groot_plaatje)
                    Log.i(TAG, "    Done. Big image.");
                else
                    Log.i(TAG, "    Done.");
            } catch (Exception ex) {
                Log.w(TAG, "Unable to download image, Falling back... Reason: " + ex.getMessage());
                artikel.plaatje = null;
            }
        }
    }

    //embed
    if (artikel_el.select("div.embed").first() != null) {
        //atm alleen support voor iframes
        Element frame = artikel_el.select("div.embed>iframe").first();
        if (frame != null)
            artikel.embed = frame.attr("src");
    }

    //embed (geenstijl.tv)
    if (!domain.equals("www.geenstijl.nl")) {
        //extract url from script
        Element scriptEl = artikel_el.select("script").first();
        if (scriptEl != null) {
            String script = scriptEl.html();
            Pattern pattern = Pattern.compile("'(.*)', fall");
            Matcher matcher = pattern.matcher(script);
            if (matcher.find() && matcher.groupCount() == 1) {
                artikel.embed = matcher.group(1);
            }
        }
    }

    //footer shit
    Element footer = artikel_el.select("footer").first();
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm", Locale.US);
    artikel.datum = simpleDateFormat.parse(footer.select("time").first().attr("datetime"));

    StringTokenizer footer_items = new StringTokenizer(footer.text(), "|");
    artikel.auteur = footer_items.nextToken().trim();

    artikel.reacties = Integer.parseInt(footer.select("a.comments").text().replace(" reacties", ""));

    artikel.link = footer.select("a").first().attr("href");

    //clean up
    artikel_el.select("h1").remove();
    artikel_el.select(".embed").remove();
    artikel_el.select("img").remove();
    artikel_el.select("footer").remove();
    artikel_el.select("a.more").remove();
    artikel_el.select("script").remove();

    //inhoud
    artikel.inhoud = artikel_el.html();

    return artikel;
}

From source file:me.vertretungsplan.parser.UntisInfoParser.java

private static void parseTimetableCellContent(Substitution s, String type, Element td) {
    String value = td.text();/*from   ww w  .j ava 2  s.c o m*/
    if (value.startsWith("*")) {
        value = value.substring(1);
    }
    if (value.equals("---.") || value.equals("---")) {
        value = null;
    }

    boolean striked = td.select("strike").text().equals(td.text());

    switch (type) {
    case "subject":
        if (striked) {
            s.setPreviousSubject(value);
        } else {
            s.setSubject(value);
        }
        break;
    case "teacher":
        if (striked) {
            s.setPreviousTeacher(value);
        } else {
            s.setTeacher(value);
        }
        break;
    case "room":
        if (striked) {
            s.setPreviousRoom(value);
        } else {
            s.setRoom(value);
        }
        break;
    }
}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text getPinContent(String url, DBCollection pinsCollection) throws JSONException {
    Document html = null;//from  w  w  w.  j  a v  a  2 s  .  c o  m
    JSONObject pin = new JSONObject();
    try {
        html = Jsoup.connect(url).get();
    } catch (Exception e) {
        return new Text("HTTP connection failed...");
    }

    // Gather major pins data
    Element doc = html.select("body").first();
    // Pin ID
    String id = (url.split("pin/")[1].split("/")[0]);
    pin.append("ID", id);

    // Pin image
    String imageURL = "";
    Element tmp = doc.select("div[class=pinImageSourceWrapper]").first();
    try {
        tmp = tmp.select("div[class=imageContainer]").select("img").first();
        imageURL = tmp.attr("src");
    } catch (Exception e) {
    }
    //        try{
    //            ByteArrayOutputStream pimg=new ByteArrayOutputStream(), cimg = new ByteArrayOutputStream();
    //            for(int i=0; i<3; i++){ 
    //                BufferedImage img=dummyImage;
    //                try{
    //                    img = ImageIO.read(new URL(imageURL));
    //                
    //                }catch(Exception e){}
    //                ImageIO.write(img, "jpg", cimg);
    //                if(pimg.size()<cimg.size()){
    //                        pimg = cimg;
    //                }
    //            }
    //            // save to hdfs
    //            Configuration conf = new Configuration();
    //            FileSystem fs = FileSystem.get(conf);
    //            Path outFile = new Path("/home/hadoop/"+id+".png");
    //            FSDataOutputStream out = fs.create(outFile);
    //            out.write(pimg.toByteArray());
    //
    //        }catch(Exception e){
    //                e.printStackTrace();
    //        }
    pin.append("image", imageURL);

    //Pin name
    tmp = doc.select("h2[itemprop=name]").first();
    String name = "";
    if (tmp != null) {
        name = tmp.text().trim();
    }
    pin.append("name", name);

    // Pin source
    Element sourceCont = doc.select("div[class=sourceFlagWrapper]").first();
    JSONObject source = new JSONObject();
    if (sourceCont != null) {
        String title = sourceCont.text().trim();
        String src = sourceCont.select("a").first().attr("href");
        source.append("title", title);
        source.append("src", src);
    }
    pin.append("source", source);

    //pin credit
    JSONObject pinCredit = new JSONObject();
    Element credit = doc.select("div[class=pinCredits]").first();
    String creditName = "", creditTitle = "", creditSource = "";
    try {
        creditName = credit.select("div[class=creditName]").text().trim();
    } catch (Exception e) {
    }
    try {
        creditTitle = credit.select("div[class=creditTitle]").text().trim();
    } catch (Exception e) {
    }
    try {
        creditSource = credit.select("a").attr("href");
    } catch (Exception e) {
    }
    pinCredit.append("name", creditName);
    pinCredit.append("title", creditTitle);
    pinCredit.append("src", creditSource);
    pin.append("credit", pinCredit);

    //comments
    JSONArray comments = new JSONArray();
    Elements commentsConts = doc.select("div[class=commenterNameCommentText]");
    for (Element commentCont : commentsConts) {
        JSONObject comment = new JSONObject();
        Element creatorEle = commentCont.select("div[class=commenterWrapper] a").first();
        String creatorName = creatorEle.text().trim();
        String creatorSrc = creatorEle.attr("href");
        String content = "", raw = "";
        Element commentContent = commentCont.select(".commentDescriptionContent").first();
        try {
            content = commentContent.text().trim();
            raw = commentContent.html();
            comment.append("creator", creatorName);
            comment.append("creator_url", creatorSrc);
            comment.append("content", content);
            comment.append("content_raw", raw);
            comments.put(comment);
        } catch (Exception e) {
        }

    }
    pin.append("comments", comments);

    //pin board link and related pins
    Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first();

    //pin board
    JSONArray board = new JSONArray();
    if (bottomDoc != null) {
        Element boardEle = bottomDoc.select("div[class=boardHeader]").first();
        JSONObject b = new JSONObject();
        String boardName = "";
        try {
            boardName = boardEle.select("h3[class=title]").text().trim();
        } catch (Exception ee) {
        }
        String boardSrc = "";
        try {
            boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim();
        } catch (Exception ee) {
        }
        b.append("name", boardName);
        b.append("src", boardSrc);
        board.put(b);
    }
    pin.append("board", board); //CAUTION: what if a pin shows up in different boards?

    //related pins
    bottomDoc = doc
            .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]")
            .first();

    JSONArray relatedPins = new JSONArray();
    if (bottomDoc != null) {
        Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]");
        for (Element relatedPinsCont : relatedPinsConts) {
            JSONObject relatedPin = new JSONObject();
            try {
                relatedPin.append("src", "https://www.pinterest.com"
                        + relatedPinsCont.select("div[class=pinHolder] > a").attr("href"));
            } catch (Exception e) {
            }
            relatedPins.put(relatedPin);
        }
    }
    pin.append("related_pins", relatedPins);

    // Optional: push data to database
    BasicDBObject dbObject = (BasicDBObject) JSON.parse(pin.toString());
    pinsCollection.insert(dbObject);
    return new Text(pin.toString());
}

From source file:me.vertretungsplan.parser.DaVinciParser.java

static void parseDaVinciTable(Element table, SubstitutionSchedule v, String klasse, SubstitutionScheduleDay day,
        ColorProvider colorProvider) {//from   ww w  . j  ava2 s  .c o m
    List<String> headers = new ArrayList<>();
    for (Element header : table.select("thead tr th, tr td[bgcolor=#9999FF]")) {
        headers.add(header.text());
    }

    // These three variables can
    Set<String> classes = new HashSet<>();
    String lesson = null;
    LocalDate currentDate = null;

    Pattern previousCurrentPattern = Pattern.compile("\\+([^\\s]+) \\(([^)]+)\\)");
    Pattern previousPattern = Pattern.compile("\\(([^)]+)\\)");

    for (Element row : table.select("tr:not(thead tr, tr:has(td[bgcolor=#9999FF]))")) {
        Substitution subst = new Substitution();
        LocalDate substDate = null;
        Elements columns = row.select("td");
        for (int i = 0; i < headers.size(); i++) {
            String value = columns.get(i).text().replace("\u00a0", "");
            String header = headers.get(i);

            if (value.isEmpty()) {
                if (header.equals("Klasse"))
                    subst.setClasses(classes);
                if (header.equals("Pos") || header.equals("Stunde") || header.equals("Std.")) {
                    subst.setLesson(lesson);
                }
                if (header.equals("Art") || header.equals("Merkmal"))
                    subst.setType("Vertretung");
                if (header.equals("Datum"))
                    substDate = currentDate;
                continue;
            }

            Matcher previousCurrentMatcher = previousCurrentPattern.matcher(value);
            Matcher previousMatcher = previousPattern.matcher(value);

            switch (header) {
            case "Klasse":
                String classesStr = value;
                if (previousMatcher.find()) {
                    classesStr = previousMatcher.group(1);
                }
                classes = new HashSet<>(Arrays.asList(classesStr.split(", ")));
                subst.setClasses(classes);
                break;
            case "Pos":
            case "Stunde":
            case "Std.":
                lesson = value;
                subst.setLesson(lesson);
                break;
            case "VLehrer Krzel":
            case "VLehrer":
            case "Vertreter":
            case "Vertretungslehrkraft":
                if (!value.startsWith("*")) {
                    subst.setTeacher(value);
                } else {
                    subst.setType(value.substring(1));
                }
                break;
            case "Lehrer":
            case "Lehrer Krzel":
            case "Lehrer Name":
            case "Lehrkraft":
                if (previousCurrentMatcher.find()) {
                    subst.setTeacher(previousCurrentMatcher.group(1));
                    subst.setPreviousTeacher(previousCurrentMatcher.group(2));
                } else if (previousMatcher.find()) {
                    subst.setPreviousTeacher(previousMatcher.group(1));
                } else {
                    subst.setPreviousTeacher(value);
                }
                break;
            case "VFach":
            case "V Fach":
                subst.setSubject(value);
                break;
            case "Fach":
            case "Original Fach":
                if (previousCurrentMatcher.find()) {
                    subst.setSubject(previousCurrentMatcher.group(1));
                    subst.setPreviousSubject(previousCurrentMatcher.group(2));
                } else {
                    subst.setPreviousSubject(value);
                }
                break;
            case "VRaum":
            case "V Raum":
                subst.setRoom(value);
                break;
            case "Raum":
            case "Original Raum":
                if (previousCurrentMatcher.find()) {
                    subst.setRoom(previousCurrentMatcher.group(1));
                    subst.setPreviousRoom(previousCurrentMatcher.group(2));
                } else {
                    subst.setPreviousRoom(value);
                }
                break;
            case "Art":
            case "Merkmal":
                subst.setType(value);
                break;
            case "Info":
            case "Mitteilung":
                subst.setDesc(value);
                break;
            case "Datum":
                substDate = ParserUtils.parseDate(value);
                currentDate = substDate;
                break;
            }
        }
        if (klasse != null) {
            Set<String> fixedClasses = new HashSet<>();
            fixedClasses.add(klasse);
            subst.setClasses(fixedClasses);
        }
        if (subst.getType() == null) {
            String recognizedType = null;
            if (subst.getDesc() != null)
                recognizedType = recognizeType(subst.getDesc());
            subst.setType(recognizedType != null ? recognizedType : "Vertretung");
        }
        subst.setColor(colorProvider.getColor(subst.getType()));

        if (substDate == null && day == null)
            continue;

        if (day == null || substDate != null && !substDate.equals(day.getDate())) {
            day = null;
            for (SubstitutionScheduleDay d : v.getDays()) {
                if (d.getDate().equals(substDate)) {
                    day = d;
                }
            }
            if (day == null) {
                day = new SubstitutionScheduleDay();
                day.setDate(substDate);
                v.addDay(day);
            }
        }

        day.addSubstitution(subst);

    }
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

private static String findLastChangeFromMonHeadTable(Element monHead) {
    if (monHead.select("td[align=right]").size() == 0)
        return null;

    String lastChange = null;/* w  w  w. ja  v  a  2  s  . com*/
    Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d \\d\\d:\\d\\d");
    Matcher matcher = pattern.matcher(monHead.select("td[align=right]").first().text());
    if (matcher.find()) {
        lastChange = matcher.group();
    } else if (monHead.text().contains("Stand: ")) {
        lastChange = monHead.text().substring(monHead.text().indexOf("Stand:") + "Stand:".length()).trim();
    }
    return lastChange;
}

From source file:com.itcs.commons.email.EmailAutoconfigClient.java

private static void extractIncommingServerSettings(Document doc, Map<String, String> settings, String type) {
    for (Element element : doc.select("incomingServer")) {
        //            System.out.println("element.attr(\"type\"):"+element.attr("type"));
        if (element.attr("type").equals(type)) {
            //                System.out.println("element.select(\"hostname\"):" + element.select("hostname").text());
            settings.put(EnumEmailSettingKeys.INBOUND_SERVER.getKey(), element.select("hostname").text());
            //                System.out.println("element.select(\"port\"):" + element.select("port").text());
            settings.put(EnumEmailSettingKeys.INBOUND_PORT.getKey(), element.select("port").text());
            //                System.out.println("element.select(\"socketType\"):" + element.select("socketType").text());
            settings.put(EnumEmailSettingKeys.INBOUND_SSL_ENABLED.getKey(),
                    element.select("socketType").text().trim().equals("SSL") ? "true" : "false");
        }/* w  w w .j a va 2s  .c om*/
    }
}