Example usage for org.jsoup.nodes Element child

List of usage examples for org.jsoup.nodes Element child

Introduction

In this page you can find the example usage for org.jsoup.nodes Element child.

Prototype

public Element child(int index) 

Source Link

Document

Get a child element of this element, by its 0-based index number.

Usage

From source file:de.geeksfactory.opacclient.apis.Zones.java

private DetailledItem parse_result(String id, String html) {
    Document doc = Jsoup.parse(html);

    DetailledItem result = new DetailledItem();
    result.setTitle("");
    boolean title_is_set = false;

    result.setId(id);/*from   w w  w  .j  av a2s. c om*/

    String detailTrsQuery = version18 ? ".inRoundBox1 table table tr"
            : ".DetailDataCell table table:not(.inRecordHeader) tr";
    Elements detailtrs1 = doc.select(detailTrsQuery);
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0) {
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
                }
            }
        }
    }

    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }

    Elements detaildiv = doc.select("div.record-item-new");
    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0) {
                        text += snip;
                    }
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                        text += "\n";
                    } else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0) {
                            text += snip;
                        }
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }

    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }

    // Cover
    if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
        result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
    }

    Elements copydivs = doc.select("div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);

        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }

        Copy copy = new Copy();
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

        // This is getting very ugly - check if it is valid for libraries which are not Hamburg.
        // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.setBranch(pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.setLocation(((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.setStatus(((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0) {
                        copy.setDepartment(((TextNode) node).text());
                    }
                    if (j == 2) {
                        copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    }
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        String date = text.substring(text.length() - 10);
                        try {
                            copy.setReturnDate(fmt.parseLocalDate(date));
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    return result;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

protected DetailledItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from   w  w  w. ja  v a 2  s .c  o  m

    DetailledItem result = new DetailledItem();

    if (doc.select(".detail_cover img").size() == 1) {
        result.setCover(doc.select(".detail_cover img").get(0).attr("src"));
    }

    result.setTitle(doc.select(".detail_titel").text());

    Elements detailtrs = doc.select(".detailzeile table tr");
    for (int i = 0; i < detailtrs.size(); i++) {
        Element tr = detailtrs.get(i);
        if (tr.child(0).hasClass("detail_feld")) {
            String title = tr.child(0).text();
            String content = tr.child(1).text();
            if (title.equals("Gesamtwerk:") || title.equals("Erschienen in:")) {
                try {
                    if (tr.child(1).select("a").size() > 0) {
                        Element link = tr.child(1).select("a").first();
                        List<NameValuePair> query = URLEncodedUtils.parse(new URI(link.absUrl("href")),
                                "UTF-8");
                        for (NameValuePair q : query) {
                            if (q.getName().equals("MedienNr")) {
                                result.setCollectionId(q.getValue());
                            }
                        }
                    }
                } catch (URISyntaxException e) {
                }
            } else {

                if (content.contains("hier klicken") && tr.child(1).select("a").size() > 0) {
                    content += " " + tr.child(1).select("a").first().attr("href");
                }

                result.addDetail(new Detail(title, content));
            }
        }
    }

    Elements detailcenterlinks = doc.select(".detailzeile_center a.detail_link");
    for (int i = 0; i < detailcenterlinks.size(); i++) {
        Element a = detailcenterlinks.get(i);
        result.addDetail(new Detail(a.text().trim(), a.absUrl("href")));
    }

    try {
        JSONObject copymap = new JSONObject();
        if (data.has("copiestable")) {
            copymap = data.getJSONObject("copiestable");
        } else {
            Elements ths = doc.select(".exemplartab .exemplarmenubar th");
            for (int i = 0; i < ths.size(); i++) {
                Element th = ths.get(i);
                String head = th.text().trim();
                if (head.equals("Zweigstelle")) {
                    copymap.put("branch", i);
                } else if (head.equals("Abteilung")) {
                    copymap.put("department", i);
                } else if (head.equals("Bereich") || head.equals("Standort")) {
                    copymap.put("location", i);
                } else if (head.equals("Signatur")) {
                    copymap.put("signature", i);
                } else if (head.equals("Barcode") || head.equals("Medien-Nummer")) {
                    copymap.put("barcode", i);
                } else if (head.equals("Status")) {
                    copymap.put("status", i);
                } else if (head.equals("Frist") || head.matches("Verf.+gbar")) {
                    copymap.put("returndate", i);
                } else if (head.equals("Vorbestellungen") || head.equals("Reservierungen")) {
                    copymap.put("reservations", i);
                }
            }
        }
        Elements exemplartrs = doc.select(".exemplartab .tabExemplar, .exemplartab .tabExemplar_");
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (int i = 0; i < exemplartrs.size(); i++) {
            Element tr = exemplartrs.get(i);

            Copy copy = new Copy();

            Iterator<?> keys = copymap.keys();
            while (keys.hasNext()) {
                String key = (String) keys.next();
                int index;
                try {
                    index = copymap.has(key) ? copymap.getInt(key) : -1;
                } catch (JSONException e1) {
                    index = -1;
                }
                if (index >= 0) {
                    try {
                        copy.set(key, tr.child(index).text(), fmt);
                    } catch (IllegalArgumentException e) {
                        e.printStackTrace();
                    }
                }
            }

            result.addCopy(copy);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    try {
        Elements bandtrs = doc.select("table .tabBand a");
        for (int i = 0; i < bandtrs.size(); i++) {
            Element tr = bandtrs.get(i);

            Volume volume = new Volume();
            volume.setId(tr.attr("href").split("=")[1]);
            volume.setTitle(tr.text());
            result.addVolume(volume);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (doc.select(".detail_vorbest a").size() == 1) {
        result.setReservable(true);
        result.setReservation_info(doc.select(".detail_vorbest a").attr("href"));
    }
    return result;
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public void getEvent(String eventId, String evType) {
    try {//  w w  w. j av  a 2  s  .co m

        Document dc = Jsoup.connect("https://afisha.yandex.ru/msk/events/" + eventId + "/").get();

        Event eb = new Event();
        eb.setEventID(eventId);
        eb.setCategory(eventTypes.get(evType));
        Elements elems = dc.select("meta");

        for (Element e : elems) {
            if (e.attributes().get("property").contains("og:description")) {
                eb.setDescription(e.attributes().get("content"));

            }

        }

        elems = dc.select("title");

        for (Element e : elems) {

            eb.setName(e.html().substring(0, e.html().indexOf("")));
        }

        elems = dc.select("a[href]");

        for (Element e : elems) {

            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("/msk/places/")) {

                    eb.setPlace(getEventPlaces(attr.getValue()));

                }
            }

        }

        elems = dc.select("tr[id]");

        for (Element e : elems) {
            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("f")) {

                    eb.setDate(e.children().first().html());

                    try {
                        Element e1 = e.child(1).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();

                        eb.setTime(e4.html());

                    } catch (NullPointerException ex) {

                        Element e1 = e.child(2).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();
                        eb.setTime(e4.html());
                    }
                }
            }

        }

        geoCode(eb);
        formJson(eb);

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parses an Untis substitution schedule table
 *
 * @param table        the <code>table</code> Element from the HTML document
 * @param data         {@link SubstitutionScheduleData#getData()}
 * @param day          the {@link SubstitutionScheduleDay} where the substitutions will be stored
 * @param defaultClass the class that should be set if there is no class column in the table
 *//*ww  w . j a  v  a  2 s .  c o m*/
private void parseSubstitutionScheduleTable(Element table, JSONObject data, SubstitutionScheduleDay day,
        String defaultClass) throws JSONException, CredentialInvalidException {
    if (data.optBoolean(PARAM_CLASS_IN_EXTRA_LINE) || data.optBoolean("class_in_extra_line")) { // backwards compatibility
        for (Element element : table.select("td.inline_header")) {
            String className = getClassName(element.text(), data);
            if (isValidClass(className)) {
                Element zeile = null;
                try {
                    zeile = element.parent().nextElementSibling();
                    if (zeile.select("td") == null) {
                        zeile = zeile.nextElementSibling();
                    }
                    int skipLines = 0;
                    while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) {
                        if (skipLines > 0) {
                            skipLines--;
                            zeile = zeile.nextElementSibling();
                            continue;
                        }

                        Substitution v = new Substitution();

                        int i = 0;
                        for (Element spalte : zeile.select("td")) {
                            String text = spalte.text();
                            if (isEmpty(text)) {
                                i++;
                                continue;
                            }

                            int skipLinesForThisColumn = 0;
                            Element nextLine = zeile.nextElementSibling();
                            boolean continueSkippingLines = true;
                            while (continueSkippingLines) {
                                if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                                    Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                                    if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                            .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                                        // Continued in the next line
                                        text += " " + columnInNextLine.text();
                                        skipLinesForThisColumn++;
                                        nextLine = nextLine.nextElementSibling();
                                    } else {
                                        continueSkippingLines = false;
                                    }
                                } else {
                                    continueSkippingLines = false;
                                }
                            }
                            if (skipLinesForThisColumn > skipLines)
                                skipLines = skipLinesForThisColumn;

                            String type = data.getJSONArray(PARAM_COLUMNS).getString(i);

                            switch (type) {
                            case "lesson":
                                v.setLesson(text);
                                break;
                            case "subject":
                                handleSubject(v, spalte);
                                break;
                            case "previousSubject":
                                v.setPreviousSubject(text);
                                break;
                            case "type":
                                v.setType(text);
                                v.setColor(colorProvider.getColor(text));
                                break;
                            case "type-entfall":
                                if (text.equals("x")) {
                                    v.setType("Entfall");
                                    v.setColor(colorProvider.getColor("Entfall"));
                                } else {
                                    v.setType("Vertretung");
                                    v.setColor(colorProvider.getColor("Vertretung"));
                                }
                                break;
                            case "room":
                                handleRoom(v, spalte);
                                break;
                            case "teacher":
                                handleTeacher(v, spalte, data);
                                break;
                            case "previousTeacher":
                                v.setPreviousTeachers(splitTeachers(text, data));
                                break;
                            case "desc":
                                v.setDesc(text);
                                break;
                            case "desc-type":
                                v.setDesc(text);
                                String recognizedType = recognizeType(text);
                                v.setType(recognizedType);
                                v.setColor(colorProvider.getColor(recognizedType));
                                break;
                            case "previousRoom":
                                v.setPreviousRoom(text);
                                break;
                            case "substitutionFrom":
                                v.setSubstitutionFrom(text);
                                break;
                            case "teacherTo":
                                v.setTeacherTo(text);
                                break;
                            case "ignore":
                                break;
                            case "date": // used by UntisSubstitutionParser
                                break;
                            default:
                                throw new IllegalArgumentException("Unknown column type: " + type);
                            }
                            i++;
                        }

                        autoDetectType(data, zeile, v);

                        v.getClasses().add(className);

                        if (v.getLesson() != null && !v.getLesson().equals("")) {
                            day.addSubstitution(v);
                        }

                        zeile = zeile.nextElementSibling();

                    }
                } catch (Throwable e) {

                    e.printStackTrace();
                }
            }
        }
    } else {
        boolean hasType = false;
        for (int i = 0; i < data.getJSONArray(PARAM_COLUMNS).length(); i++) {
            if (data.getJSONArray(PARAM_COLUMNS).getString(i).equals("type")) {
                hasType = true;
            }
        }
        int skipLines = 0;
        for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), "
                + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]):gt(0)")) {
            if (skipLines > 0) {
                skipLines--;
                continue;
            }

            Substitution v = new Substitution();
            String klassen = defaultClass != null ? defaultClass : "";
            int i = 0;
            for (Element spalte : zeile.select("td")) {
                String text = spalte.text();

                String type = data.getJSONArray(PARAM_COLUMNS).getString(i);
                if (isEmpty(text) && !type.equals("type-entfall")) {
                    i++;
                    continue;
                }

                int skipLinesForThisColumn = 0;
                Element nextLine = zeile.nextElementSibling();
                boolean continueSkippingLines = true;
                while (continueSkippingLines) {
                    if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                        Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                        if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                            // Continued in the next line
                            text += " " + columnInNextLine.text();
                            skipLinesForThisColumn++;
                            nextLine = nextLine.nextElementSibling();
                        } else {
                            continueSkippingLines = false;
                        }
                    } else {
                        continueSkippingLines = false;
                    }
                }
                if (skipLinesForThisColumn > skipLines)
                    skipLines = skipLinesForThisColumn;

                switch (type) {
                case "lesson":
                    v.setLesson(text);
                    break;
                case "subject":
                    handleSubject(v, spalte);
                    break;
                case "previousSubject":
                    v.setPreviousSubject(text);
                    break;
                case "type":
                    v.setType(text);
                    v.setColor(colorProvider.getColor(text));
                    break;
                case "type-entfall":
                    if (text.equals("x")) {
                        v.setType("Entfall");
                        v.setColor(colorProvider.getColor("Entfall"));
                    } else if (!hasType) {
                        v.setType("Vertretung");
                        v.setColor(colorProvider.getColor("Vertretung"));
                    }
                    break;
                case "room":
                    handleRoom(v, spalte);
                    break;
                case "previousRoom":
                    v.setPreviousRoom(text);
                    break;
                case "desc":
                    v.setDesc(text);
                    break;
                case "desc-type":
                    v.setDesc(text);
                    String recognizedType = recognizeType(text);
                    v.setType(recognizedType);
                    v.setColor(colorProvider.getColor(recognizedType));
                    break;
                case "teacher":
                    handleTeacher(v, spalte, data);
                    break;
                case "previousTeacher":
                    v.setPreviousTeachers(splitTeachers(text, data));
                    break;
                case "substitutionFrom":
                    v.setSubstitutionFrom(text);
                    break;
                case "teacherTo":
                    v.setTeacherTo(text);
                    break;
                case "class":
                    klassen = getClassName(text, data);
                    break;
                case "ignore":
                    break;
                case "date": // used by UntisSubstitutionParser
                    break;
                default:
                    throw new IllegalArgumentException("Unknown column type: " + type);
                }
                i++;
            }

            if (v.getLesson() == null || v.getLesson().equals("")) {
                continue;
            }

            autoDetectType(data, zeile, v);

            List<String> affectedClasses;

            // Detect things like "7"
            Pattern singlePattern = Pattern.compile("(\\d+)");
            Matcher singleMatcher = singlePattern.matcher(klassen);

            // Detect things like "5-12"
            Pattern rangePattern = Pattern.compile("(\\d+) ?- ?(\\d+)");
            Matcher rangeMatcher = rangePattern.matcher(klassen);

            Pattern pattern2 = Pattern.compile("^(\\d+).*");

            if (rangeMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int min = Integer.parseInt(rangeMatcher.group(1));
                int max = Integer.parseInt(rangeMatcher.group(2));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches()) {
                            int num = Integer.parseInt(matcher2.group(1));
                            if (min <= num && num <= max)
                                affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else if (singleMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int grade = Integer.parseInt(singleMatcher.group(1));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches() && grade == Integer.parseInt(matcher2.group(1))) {
                            affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else {
                if (data.optBoolean(PARAM_CLASSES_SEPARATED, true)
                        && data.optBoolean("classes_separated", true)) { // backwards compatibility
                    affectedClasses = Arrays.asList(klassen.split(", "));
                } else {
                    affectedClasses = new ArrayList<>();
                    try {
                        for (String klasse : getAllClasses()) { // TODO: is there a better way?
                            StringBuilder regex = new StringBuilder();
                            for (char character : klasse.toCharArray()) {
                                if (character == '?') {
                                    regex.append("\\?");
                                } else {
                                    regex.append(character);
                                }
                                regex.append(".*");
                            }
                            if (klassen.matches(regex.toString())) {
                                affectedClasses.add(klasse);
                            }
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

            for (String klasse : affectedClasses) {
                if (isValidClass(klasse)) {
                    v.getClasses().add(klasse);
                }
            }

            if (data.optBoolean(PARAM_MERGE_WITH_DIFFERENT_TYPE, false)) {
                boolean found = false;
                for (Substitution subst : day.getSubstitutions()) {
                    if (subst.equalsExcludingType(v)) {
                        found = true;

                        if (v.getType().equals("Vertretung")) {
                            subst.setType("Vertretung");
                            subst.setColor(colorProvider.getColor("Vertretung"));
                        }

                        break;
                    }
                }
                if (!found) {
                    day.addSubstitution(v);
                }
            } else {
                day.addSubstitution(v);
            }
        }
    }
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

private final static void apply(Element target, List<Renderer> rendererList, RenderAction renderAction,
        int startIndex, int count) {

    // The renderer list have to be applied recursively because the
    // transformer will always return a new Element clone.

    if (startIndex >= count) {
        return;/*from ww  w  .  j  av a 2  s  .c  om*/
    }

    final Renderer currentRenderer = rendererList.get(startIndex);

    RendererType rendererType = currentRenderer.getRendererType();

    switch (rendererType) {
    case GO_THROUGH:
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    /*
    case DEBUG:
    currentRenderer.getTransformerList().get(0).invoke(target);
    apply(target, rendererList, renderAction, startIndex + 1, count);
    return;
    */
    case RENDER_ACTION:
        ((RenderActionRenderer) currentRenderer).getStyle().apply(renderAction);
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    default:
        // do nothing
        break;
    }

    String selector = currentRenderer.getSelector();
    List<Transformer<?>> transformerList = currentRenderer.getTransformerList();

    List<Element> elemList;
    if (PSEUDO_ROOT_SELECTOR.equals(selector)) {
        elemList = new LinkedList<Element>();
        elemList.add(target);
    } else {
        elemList = new ArrayList<>(target.select(selector));
    }

    if (elemList.isEmpty()) {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            elemList.add(target);
            transformerList.clear();
            transformerList.add(
                    new RendererTransformer(((ElementNotFoundHandler) currentRenderer).alternativeRenderer()));
        } else if (renderAction.isOutputMissingSelectorWarning()) {
            String creationInfo = currentRenderer.getCreationSiteInfo();
            if (creationInfo == null) {
                creationInfo = "";
            } else {
                creationInfo = " at [ " + creationInfo + " ]";
            }
            logger.warn(
                    "There is no element found for selector [{}]{}, if it is deserved, try Renderer#disableMissingSelectorWarning() "
                            + "to disable this message and Renderer#enableMissingSelectorWarning could enable this warning again in "
                            + "your renderer chain",
                    selector, creationInfo);
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }

    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }
    }

    Element delayedElement = null;
    Element resultNode;
    // TODO we suppose that the element is listed as the order from parent
    // to children, so we reverse it. Perhaps we need a real order process
    // to ensure the wanted order.
    Collections.reverse(elemList);
    boolean renderForRoot;
    for (Element elem : elemList) {
        renderForRoot = PSEUDO_ROOT_SELECTOR.equals(selector)
                || rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER;
        if (!renderForRoot) {
            // faked group node will be not applied by renderers(only when the current selector is not the pseudo :root)
            if (elem.tagName().equals(ExtNodeConstants.GROUP_NODE_TAG)
                    && ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE
                            .equals(elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE))) {
                continue;
            }
        }

        if (elem == target) {
            delayedElement = elem;
            continue;
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(elem);
            elem.before(resultNode);
        } // for transformer
        elem.remove();
    } // for element

    // if the root element is one of the process targets, we can not apply
    // the left renderers to original element because it will be replaced by
    // a new element even it is not necessary (that is how Transformer
    // works).
    if (delayedElement == null) {
        apply(target, rendererList, renderAction, startIndex + 1, count);
    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER && delayedElement instanceof Document) {
            delayedElement = delayedElement.child(0);
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(delayedElement);
            delayedElement.before(resultNode);
            apply(resultNode, rendererList, renderAction, startIndex + 1, count);
        } // for transformer
        delayedElement.remove();
    }

}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * adds any siblings that may have a decent score to this node
 *
 * @param node/*from   w  ww.java2 s  . c o  m*/
 * @return
 */
private Element addSiblings(Element node) {
    if (logger.isDebugEnabled()) {
        logger.debug("Starting to add siblings");
    }
    int baselineScoreForSiblingParagraphs = getBaselineScoreForSiblings(node);

    Element currentSibling = node.previousElementSibling();
    while (currentSibling != null) {
        if (logger.isDebugEnabled()) {
            logger.debug("SIBLINGCHECK: " + debugNode(currentSibling));
        }

        if (currentSibling.tagName().equals("p")) {

            node.child(0).before(currentSibling.outerHtml());
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }

        // check for a paraph embedded in a containing element
        int insertedSiblings = 0;
        Elements potentialParagraphs = currentSibling.getElementsByTag("p");
        if (potentialParagraphs.first() == null) {
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }
        for (Element firstParagraph : potentialParagraphs) {
            WordStats wordStats = StopWords.getStopWordCount(firstParagraph.text());

            int paragraphScore = wordStats.getStopWordCount();

            if ((float) (baselineScoreForSiblingParagraphs * .30) < paragraphScore) {
                if (logger.isDebugEnabled()) {
                    logger.debug("This node looks like a good sibling, adding it");
                }
                node.child(insertedSiblings).before("<p>" + firstParagraph.text() + "<p>");
                insertedSiblings++;
            }

        }

        currentSibling = currentSibling.previousElementSibling();
    }
    return node;

}

From source file:com.liato.bankdroid.banking.banks.AbsIkanoPartner.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }//from  w ww  . j a  v a  2s.c om

    urlopen = login();
    Document d = Jsoup.parse(response);
    Element element = d.select("#primary-nav > li:eq(1) > a").first();
    if (element != null && element.attr("href") != null) {
        String myAccountUrl = element.attr("href");
        try {
            response = urlopen.open("https://partner.ikanobank.se/" + myAccountUrl);
            d = Jsoup.parse(response);
            Elements es = d.select("#CustomerAccountInformationSpan > span > span");
            int accId = 0;
            for (Element el : es) {
                Element name = el.select("> span > span:eq(0)").first();
                Element balance = el.select("> span:eq(1)").first();
                Element currency = el.select("> span:eq(2)").first();
                if (name != null && balance != null && currency != null) {
                    Account account = new Account(name.text().trim(), Helpers.parseBalance(balance.text()),
                            Integer.toString(accId));
                    account.setCurrency(Helpers.parseCurrency(currency.text(), "SEK"));
                    if (accId > 0) {
                        account.setAliasfor("0");
                    }
                    accounts.add(account);
                    accId++;
                }
            }
            if (accounts.isEmpty()) {
                throw new BankException(res.getText(R.string.no_accounts_found).toString());
            }
            // Use the amount from "Kvar att handla fr" which should be the
            // last account in the list.
            this.balance = accounts.get(accounts.size() - 1).getBalance();
            ArrayList<Transaction> transactions = new ArrayList<Transaction>();
            es = d.select("#ShowCustomerTransactionPurchasesInformationDiv table tr:has(td)");
            for (Element el : es) {
                if (el.childNodeSize() == 6) {
                    Transaction transaction = new Transaction(el.child(0).text().trim(),
                            el.child(1).text().trim(), Helpers.parseBalance(el.child(2).text()));
                    transaction.setCurrency(Helpers.parseCurrency(el.child(3).text().trim(), "SEK"));
                    transactions.add(transaction);
                }
            }
            accounts.get(0).setTransactions(transactions);
        }

        catch (ClientProtocolException e) {
            throw new BankException(e.getMessage());
        } catch (IOException e) {
            throw new BankException(e.getMessage());
        }
    }
    if (accounts.isEmpty()) {
        throw new BankException(res.getText(R.string.no_accounts_found).toString());
    }
    super.updateComplete();
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected void parse_medialist(List<LentItem> media, Document doc, int offset) {
    Elements copytrs = doc.select(".data tr");
    doc.setBaseUri(opac_url);/*w  ww.  j a v a  2 s  . c  o m*/

    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    int trs = copytrs.size();
    if (trs == 1) {
        return;
    }
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        LentItem item = new LentItem();

        if (tr.text().contains("keine Daten")) {
            return;
        }

        item.setTitle(tr.child(1).select("strong").text().trim());
        try {
            item.setAuthor(tr.child(1).html().split("<br[ /]*>")[1].trim());

            String[] col2split = tr.child(2).html().split("<br[ /]*>");
            String deadline = col2split[0].trim();
            if (deadline.contains("-")) {
                deadline = deadline.split("-")[1].trim();
            }
            try {
                item.setDeadline(fmt.parseLocalDate(deadline).toString());
            } catch (IllegalArgumentException e1) {
                e1.printStackTrace();
            }

            if (col2split.length > 1) {
                item.setHomeBranch(col2split[1].trim());
            }

            if (tr.select("a").size() > 0) {
                for (Element link : tr.select("a")) {
                    String href = link.attr("abs:href");
                    Map<String, String> hrefq = getQueryParamsFirst(href);
                    if (hrefq.get("methodToCall").equals("renewalPossible")) {
                        item.setProlongData(offset + "$" + href.split("\\?")[1]);
                        item.setRenewable(true);
                        break;
                    }
                }
            } else if (tr.select(".textrot, .textgruen, .textdunkelblau").size() > 0) {
                item.setProlongData("" + tr.select(".textrot, .textgruen, .textdunkelblau").text());
                item.setRenewable(false);
            }

        } catch (Exception ex) {
            ex.printStackTrace();
        }

        media.add(item);
    }
    assert (media.size() == trs - 1);

}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected void parse_reslist(String type, List<ReservedItem> reservations, Document doc, int offset) {
    Elements copytrs = doc.select(".data tr");
    doc.setBaseUri(opac_url);//w w w . j  a v  a 2  s.  c o  m
    int trs = copytrs.size();
    if (trs == 1) {
        return;
    }
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        ReservedItem item = new ReservedItem();

        if (tr.text().contains("keine Daten") || tr.children().size() == 1) {
            return;
        }

        item.setTitle(tr.child(1).select("strong").text().trim());
        try {
            String[] rowsplit1 = tr.child(1).html().split("<br[ /]*>");
            String[] rowsplit2 = tr.child(2).html().split("<br[ /]*>");
            if (rowsplit1.length > 1)
                item.setAuthor(rowsplit1[1].trim());
            if (rowsplit2.length > 2)
                item.setBranch(rowsplit2[2].trim());
            if (rowsplit2.length > 2)
                item.setStatus(rowsplit2[0].trim());

            if (tr.select("a").size() == 1) {
                item.setCancelData(type + "$" + offset + "$" + tr.select("a").attr("abs:href").split("\\?")[1]);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        reservations.add(item);
    }
    assert (reservations.size() == trs - 1);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);/* w ww . j a va  2  s .  c o  m*/

    String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING);

    Document doc2 = Jsoup.parse(html2);
    doc2.setBaseUri(opac_url);

    String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive",
            ENCODING);

    Document doc3 = Jsoup.parse(html3);
    doc3.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    try {
        result.setId(doc.select("#bibtip_id").text().trim());
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    List<String> reservationlinks = new ArrayList<>();
    for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
        String href = link.absUrl("href");
        Map<String, String> hrefq = getQueryParamsFirst(href);
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }

        // Vormerken
        if (hrefq.get("methodToCall") != null) {
            if (hrefq.get("methodToCall").equals("doVormerkung")
                    || hrefq.get("methodToCall").equals("doBestellung")) {
                reservationlinks.add(href.split("\\?")[1]);
            }
        }
    }
    if (reservationlinks.size() == 1) {
        result.setReservable(true);
        result.setReservation_info(reservationlinks.get(0));
    } else if (reservationlinks.size() == 0) {
        result.setReservable(false);
    } else {
        // TODO: Multiple options - handle this case!
    }

    if (doc.select(".data td img").size() == 1) {
        result.setCover(doc.select(".data td img").first().attr("abs:src"));
        try {
            downloadCover(result);
        } catch (Exception e) {

        }
    }

    if (doc.select(".aw_teaser_title").size() == 1) {
        result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
    } else if (doc.select(".data td strong").size() > 0) {
        result.setTitle(doc.select(".data td strong").first().text().trim());
    } else {
        result.setTitle("");
    }
    if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
        result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
    }

    String title = "";
    String text = "";
    boolean takeover = false;
    Element detailtrs = doc2.select(".box-container .data td").first();
    for (Node node : detailtrs.childNodes()) {
        if (node instanceof Element) {
            if (((Element) node).tagName().equals("strong")) {
                title = ((Element) node).text().trim();
                text = "";
            } else {
                if (((Element) node).tagName().equals("a")
                        && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
                    text = text + node.attr("href");
                    takeover = true;
                    break;
                }
            }
        } else if (node instanceof TextNode) {
            text = text + ((TextNode) node).text();
        }
    }
    if (!takeover) {
        text = "";
        title = "";
    }

    detailtrs = doc2.select("#tab-content .data td").first();
    if (detailtrs != null) {
        for (Node node : detailtrs.childNodes()) {
            if (node instanceof Element) {
                if (((Element) node).tagName().equals("strong")) {
                    if (!text.equals("") && !title.equals("")) {
                        result.addDetail(new Detail(title.trim(), text.trim()));
                        if (title.equals("Titel:")) {
                            result.setTitle(text.trim());
                        }
                        text = "";
                    }

                    title = ((Element) node).text().trim();
                } else {
                    if (((Element) node).tagName().equals("a")
                            && (((Element) node).text().trim().contains("hier klicken")
                                    || title.equals("Link:"))) {
                        text = text + node.attr("href");
                    } else {
                        text = text + ((Element) node).text();
                    }
                }
            } else if (node instanceof TextNode) {
                text = text + ((TextNode) node).text();
            }
        }
    } else {
        if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
            Elements rows = doc2.select("#tab-content .fulltitle tr");
            for (Element tr : rows) {
                if (tr.children().size() == 2) {
                    Element valcell = tr.child(1);
                    String value = valcell.text().trim();
                    if (valcell.select("a").size() == 1) {
                        value = valcell.select("a").first().absUrl("href");
                    }
                    result.addDetail(new Detail(tr.child(0).text().trim(), value));
                }
            }
        } else {
            result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR),
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
        }
    }
    if (!text.equals("") && !title.equals("")) {
        result.addDetail(new Detail(title.trim(), text.trim()));
        if (title.equals("Titel:")) {
            result.setTitle(text.trim());
        }
    }
    for (Element link : doc3.select("#tab-content a")) {
        Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }
    }
    for (Element link : doc3.select(".box-container a")) {
        if (link.text().trim().equals("Download")) {
            result.addDetail(
                    new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
        }
    }

    Map<String, Integer> copy_columnmap = new HashMap<>();
    // Default values
    copy_columnmap.put("barcode", 1);
    copy_columnmap.put("branch", 3);
    copy_columnmap.put("status", 4);
    Elements copy_columns = doc.select("#tab-content .data tr#bg2 th");
    for (int i = 0; i < copy_columns.size(); i++) {
        Element th = copy_columns.get(i);
        String head = th.text().trim();
        if (head.contains("Status")) {
            copy_columnmap.put("status", i);
        }
        if (head.contains("Zweigstelle")) {
            copy_columnmap.put("branch", i);
        }
        if (head.contains("Mediennummer")) {
            copy_columnmap.put("barcode", i);
        }
        if (head.contains("Standort")) {
            copy_columnmap.put("location", i);
        }
        if (head.contains("Signatur")) {
            copy_columnmap.put("signature", i);
        }
    }

    Pattern status_lent = Pattern.compile(
            "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
    Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");

    Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2");
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    for (Element tr : exemplartrs) {
        try {
            Copy copy = new Copy();
            Element status = tr.child(copy_columnmap.get("status"));
            Element barcode = tr.child(copy_columnmap.get("barcode"));
            String barcodetext = barcode.text().trim().replace(" Wegweiser", "");

            // STATUS
            String statustext;
            if (status.getElementsByTag("b").size() > 0) {
                statustext = status.getElementsByTag("b").text().trim();
            } else {
                statustext = status.text().trim();
            }
            if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
                Matcher matcher1 = status_and_barcode.matcher(statustext);
                if (matcher1.matches()) {
                    statustext = matcher1.group(1);
                    barcodetext = matcher1.group(2);
                }
            }

            Matcher matcher = status_lent.matcher(statustext);
            if (matcher.matches()) {
                copy.setStatus(matcher.group(1));
                copy.setReservations(matcher.group(3));
                copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
            } else {
                copy.setStatus(statustext);
            }
            copy.setBarcode(barcodetext);
            if (status.select("a[href*=doVormerkung]").size() == 1) {
                copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
            }

            String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
            copy.setBranch(branchtext);

            if (copy_columnmap.containsKey("location")) {
                copy.setLocation(
                        tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
            }

            if (copy_columnmap.containsKey("signature")) {
                copy.setShelfmark(
                        tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
            }

            result.addCopy(copy);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}