Example usage for org.jsoup.nodes Element children

List of usage examples for org.jsoup.nodes Element children

Introduction

In this page you can find the example usage for org.jsoup.nodes Element children.

Prototype

public Elements children() 

Source Link

Document

Get this element's child elements.

Usage

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private Map<String, ConfluenceLink> buildTableOfContentsLinkMap() {
    final Map<String, ConfluenceLink> titleLinkMap = new HashMap<>();

    final Document document = SWAGGER_DOCUMENT.get();
    final Elements tocElements = document.select(".toc");

    final Elements tocCategoryElements = tocElements.select(".sectlevel1").first().children();

    final Elements tocFilteredCategoryElements = new Elements();

    for (final Element tocCategoryElement : tocCategoryElements) {
        final Element categoryLinkElement = tocCategoryElement.children().first();
        tocFilteredCategoryElements.add(categoryLinkElement);
    }//from ww w . j a v a2  s.  c o m

    final Elements tocIndividualElements = tocElements.select(".sectlevel2");

    addLinksByType(titleLinkMap, tocFilteredCategoryElements, PageType.CATEGORY, null);

    int categoryCount = 1;

    for (final Element tocIndividualElement : tocIndividualElements) {
        final Elements tocIndividualElementLinks = tocIndividualElement.select("a");
        addLinksByType(titleLinkMap, tocIndividualElementLinks, INDIVIDUAL, categoryCount);
        categoryCount++;
    }

    return titleLinkMap;
}

From source file:me.vertretungsplan.parser.IndiwareParser.java

SubstitutionScheduleDay parseIndiwareDay(Element doc, boolean html) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();

    DataSource ds;//from w w w  .  j a  v a  2 s .c o  m
    if (html) {
        ds = new HTMLDataSource(doc);
    } else {
        ds = new XMLDataSource(doc);
    }

    Matcher matcher = datePattern.matcher(ds.titel().text());
    if (!matcher.find())
        throw new IOException("malformed date: " + ds.titel().text());
    String date = matcher.group();
    day.setDate(
            DateTimeFormat.forPattern("EEEE, dd. MMMM yyyy").withLocale(Locale.GERMAN).parseLocalDate(date));

    String lastChange = ds.datum().text();
    day.setLastChange(DateTimeFormat.forPattern("dd.MM.yyyy, HH:mm").withLocale(Locale.GERMAN)
            .parseLocalDateTime(lastChange));

    if (ds.kopfinfos().size() > 0) {
        for (Element kopfinfo : ds.kopfinfos()) {
            String title = html ? kopfinfo.select("th").text() : kopfinfoTitle(kopfinfo.tagName()) + ":";

            StringBuilder message = new StringBuilder();
            if (title != null && !title.isEmpty()) {
                message.append("<b>").append(title).append("</b>").append(" ");
            }
            message.append(html ? kopfinfo.select("td").text() : kopfinfo.text());

            day.addMessage(message.toString());
        }
    }

    if (ds.fuss() != null) {
        StringBuilder message = new StringBuilder();
        boolean first = true;
        for (Element fusszeile : ds.fusszeilen()) {
            if (first) {
                first = false;
            } else {
                message.append("\n");
            }
            message.append(fusszeile.text());
        }
        day.addMessage(message.toString());
    }

    List<String> columnTypes = null;
    if (html) {
        columnTypes = new ArrayList<>();
        for (Element th : ((HTMLDataSource) ds).headers()) {
            columnTypes.add(th.className().replace("thplan", "").replace("thlplan", ""));
        }
    }

    for (Element aktion : ds.aktionen()) {
        Substitution substitution = new Substitution();
        String type = "Vertretung";
        String course = null;
        int i = 0;
        for (Element info : aktion.children()) {
            String value = info.text().replace("\u00a0", "");
            if (value.equals("---")) {
                i++;
                continue;
            }
            final String columnType = html ? columnTypes.get(i) : info.tagName();
            switch (columnType) {
            case "klasse":
                Set<String> classes = new HashSet<>();
                for (String klasse : value.split(",")) {
                    Matcher courseMatcher = coursePattern.matcher(klasse);
                    if (courseMatcher.matches()) {
                        classes.add(courseMatcher.group(1));
                        course = courseMatcher.group(2);
                    } else {
                        classes.add(klasse);
                    }
                }
                substitution.setClasses(classes);
                break;
            case "stunde":
                substitution.setLesson(value);
                break;
            case "fach":
                String subject = subjectAndCourse(course, value);
                if (columnTypes != null && columnTypes.contains("vfach")) {
                    substitution.setPreviousSubject(subject);
                } else {
                    substitution.setSubject(subject);
                }
                break;
            case "vfach":
                substitution.setSubject(subjectAndCourse(course, value));
            case "lehrer":
                Matcher bracesMatcher = bracesPattern.matcher(value);
                if (bracesMatcher.matches())
                    value = bracesMatcher.group(1);
                substitution.setTeacher(value);
                break;
            case "raum":
                if (columnTypes != null && columnTypes.contains("vraum")) {
                    substitution.setPreviousRoom(value);
                } else {
                    substitution.setRoom(value);
                }
                break;
            case "vraum":
                substitution.setRoom(value);
            case "info":
                Matcher substitutionMatcher = substitutionPattern.matcher(value);
                Matcher cancelMatcher = cancelPattern.matcher(value);
                Matcher delayMatcher = delayPattern.matcher(value);
                Matcher selfMatcher = selfPattern.matcher(value);
                if (substitutionMatcher.matches()) {
                    substitution.setPreviousSubject(substitutionMatcher.group(1));
                    substitution.setPreviousTeacher(substitutionMatcher.group(2));
                    if (!substitutionMatcher.group(3).isEmpty()) {
                        substitution.setDesc(substitutionMatcher.group(3));
                    }
                } else if (cancelMatcher.matches()) {
                    type = "Entfall";
                    substitution.setPreviousSubject(cancelMatcher.group(1));
                    substitution.setPreviousTeacher(cancelMatcher.group(2));
                } else if (delayMatcher.matches()) {
                    type = "Verlegung";
                    substitution.setPreviousSubject(delayMatcher.group(1));
                    substitution.setPreviousTeacher(delayMatcher.group(2));
                    substitution.setDesc(delayMatcher.group(3));
                } else if (selfMatcher.matches()) {
                    type = "selbst.";
                    if (!selfMatcher.group(1).isEmpty())
                        substitution.setDesc(selfMatcher.group(1));
                } else if (value.equals("fllt aus") || value.equals("Klausur") || value.equals("Aufg.")) {
                    type = value;
                } else {
                    substitution.setDesc(value);
                }
                break;
            }
            i++;
        }
        substitution.setType(type);
        substitution.setColor(colorProvider.getColor(substitution.getType()));
        if (course != null && substitution.getSubject() == null) {
            substitution.setSubject(course);
        }
        day.addSubstitution(substitution);
    }

    return day;
}

From source file:us.colloquy.index.IndexHandler.java

public void getURIForAllLetters(Set<DocumentPointer> uriList, String letterDirectory, boolean useOnlyNumber) {
    ///Documents/Tolstoy/diaries

    Path pathToLetters = FileSystems.getDefault().getPath(letterDirectory);

    List<Path> results = new ArrayList<>();

    int maxDepth = 6;

    try (Stream<Path> stream = Files.find(pathToLetters, maxDepth, (path, attr) -> {
        return String.valueOf(path).endsWith(".ncx");
    })) {//from  www  . j ava  2s .com

        stream.forEach(results::add);

        //            String joined = stream
        //                    .sorted()
        //                    .map(String::valueOf)
        //                    .collect(Collectors.joining("; "));
        //
        //            System.out.println("\nFound: " + joined);

    } catch (IOException e) {
        e.printStackTrace();
    }

    System.out.println("files: " + results.size());

    try {

        for (Path res : results) {
            Path parent = res.getParent();

            //                System.out.println("---------------------------------------------");
            //                System.out.println(parent.toString());
            //use jsoup to list all files that contain something useful
            Document doc = Jsoup.parse(res.toFile(), "UTF-8");

            String title = "";

            for (Element element : doc.getElementsByTag("docTitle")) {
                //Letter letter = new Letter();

                // StringBuilder content = new StringBuilder();

                for (Element child : element.children()) {
                    title = child.text();
                    // System.out.println("Title: " + title);
                }
            }

            for (Element element : doc.getElementsByTag("navPoint")) {
                //Letter letter = new Letter();

                // StringBuilder content = new StringBuilder();

                for (Element child : element.children()) {
                    String label = child.text();

                    if (StringUtils.isNotEmpty(label)) {
                        if (label.matches("?")) {
                            System.out.println("------------------");
                        }

                        String url = child.getElementsByTag("content").attr("src");

                        if (label.matches(".*\\d{1,3}.*[?--?]+.*") && StringUtils.isNotEmpty(url)) {
                            DocumentPointer documentPointer = new DocumentPointer(
                                    parent.toString() + File.separator + url.replaceAll("#.*", ""), title);

                            uriList.add(documentPointer);
                            //                                System.out.println("nav point: " + label + " src " + parent.toString()
                            //                                        + System.lineSeparator() + url.replaceAll("#.*",""));

                        } else if (label.matches(".*\\d{1,3}.*") && StringUtils.isNotEmpty(url)
                                && useOnlyNumber) {
                            DocumentPointer documentPointer = new DocumentPointer(
                                    parent.toString() + File.separator + url.replaceAll("#.*", ""), title);

                            uriList.add(documentPointer);
                            //                                System.out.println("nav point: " + label + " src " + parent.toString()
                            //                                        + System.lineSeparator() + url.replaceAll("#.*",""));

                        } else {
                            // System.out.println("nav point: " + label + " src " + child.getElementsByTag("content").attr("src"));
                        }

                    }
                }
            }

        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    //        System.out.println("Size: " + uriList.size());

    //        for (DocumentPointer pointer : uriList)
    //        {
    //            //parse and
    //            System.out.println(pointer.getSourse() + "\t" + pointer.getUri());
    //        }
}

From source file:us.colloquy.index.IndexHandler.java

public void getURIForAllDiaries(List<DocumentPointer> documentPointers, Path pathToLetters) {
    List<Path> results = new ArrayList<>();

    int maxDepth = 6;

    try (Stream<Path> stream = Files.find(pathToLetters, maxDepth, (path, attr) -> {
        return String.valueOf(path).endsWith(".ncx");
    })) {/*from   ww  w. j a v a2s.  co  m*/

        stream.forEach(results::add);

    } catch (IOException e) {
        e.printStackTrace();
    }

    System.out.println("files: " + results.size());

    try {

        for (Path res : results) {
            Path parent = res.getParent();

            //                System.out.println("---------------------------------------------");
            //                System.out.println(parent.toString());
            //use jsoup to list all files that contain something useful
            Document doc = Jsoup.parse(res.toFile(), "UTF-8");

            String title = "";

            for (Element element : doc.getElementsByTag("docTitle")) {
                //Letter letter = new Letter();

                // StringBuilder content = new StringBuilder();

                for (Element child : element.children()) {
                    title = child.text();
                    // System.out.println("Title: " + title);
                }
            }

            //  System.out.println("==========================   " + res.toString() + " ==========================");

            boolean startPrinting = false;

            boolean newFile = true;

            for (Element element : doc.getElementsByTag("navPoint")) {

                //get nav label and content

                Element navLabelElement = element.select("navLabel").first();
                Element srsElement = element.select("content").first();

                String navLabel = "";
                String srs = "";

                if (navLabelElement != null) {
                    navLabel = navLabelElement.text().replaceAll("\\*", "").trim();
                }

                if (srsElement != null) {
                    srs = srsElement.attr("src");
                }

                if ("??".matches(navLabel))

                {
                    startPrinting = false;

                    // System.out.println("----------------- end of file pointer ---------------");
                }

                if (StringUtils.isNotEmpty(navLabel)
                        && navLabel.matches("??.*|?? ?.*") && newFile) {
                    newFile = false;
                    startPrinting = true;
                }

                if (startPrinting && !navLabel
                        .matches("(|??? ??)")) {
                    // System.out.println("----------------- file pointer ---------------");
                    //   System.out.println(navLabel + "\t" + srs);

                    DocumentPointer documentPointer = new DocumentPointer(
                            parent.toString() + File.separator + srs.replaceAll("#.*", ""), title);

                    documentPointers.add(documentPointer);
                }

            }

            //   System.out.println("==========================   END OF FILE ==========================");

        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    System.out.println("Size: " + documentPointers.size());

    //  for (DocumentPointer pointer : documentPointers)
    // {
    //parse and
    //     System.out.println(pointer.getSourse() + "\t" + pointer.getUri());
}

From source file:de.geeksfactory.opacclient.apis.Heidi.java

@Override
public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException {
    String html = httpGet(opac_url + "/konto.cgi?sess=" + sessid + "&email=&verl=Gesamtkontoverlngerung",
            ENCODING);/* www.  j a v a2  s.  c  o m*/
    Document doc = Jsoup.parse(html);

    if (doc.select("input[name=pw]").size() > 0) {
        try {
            login(account);
        } catch (OpacErrorException e) {
            return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
        return prolongAll(account, useraction, selection);
    }

    List<Map<String, String>> result = new ArrayList<>();

    Map<String, String> line = new HashMap<>();
    for (Element tr : doc.select(".kontobox table tbody tr")) {
        if (tr.children().size() < 2) {
            if (line.size() > 0) {
                line.put(ProlongAllResult.KEY_LINE_MESSAGE, tr.child(0).text().trim());
                result.add(line);
                line = new HashMap<>();
            }
            continue;
        }
        String label = tr.child(0).text();
        String text = tr.child(1).text().trim();
        if (label.contains("Verfasser")) {
            line.put(ProlongAllResult.KEY_LINE_AUTHOR, text);
        } else if (label.contains("Titel")) {
            line.put(ProlongAllResult.KEY_LINE_TITLE, text);
        } else if (label.contains("Altes Leihfristende")) {
            line.put(ProlongAllResult.KEY_LINE_OLD_RETURNDATE, text);
        } else if (label.contains("Neues")) {
            line.put(ProlongAllResult.KEY_LINE_NEW_RETURNDATE, text);
        }
    }

    return new ProlongAllResult(MultiStepResult.Status.OK, result);
}

From source file:de.geeksfactory.opacclient.apis.Zones22.java

private DetailledItem parse_result(String id, String html) throws IOException {
    Document doc = Jsoup.parse(html);

    DetailledItem result = new DetailledItem();
    result.setTitle("");
    boolean title_is_set = false;

    result.setId(id);//from   w  w w .  j  a v  a  2 s.co  m

    Elements detaildiv = doc.select("div.record-item-new");

    Elements detailtrs1 = doc.select(".DetailDataCell table table:not(.inRecordHeader) tr");
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0)
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
            }
        }
    }

    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }

    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0)
                        text += snip;
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br"))
                        text += "\n";
                    else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0)
                            text += snip;
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }

    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("\\&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }

    Elements copydivs = doc.select(".DetailDataCell div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);

        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }

        Map<String, String> copy = new HashMap<String, String>();

        // This is getting very ugly - check if it is valid for libraries
        // which are not
        // Hamburg.
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.put(DetailledItem.KEY_COPY_BRANCH, pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.put(DetailledItem.KEY_COPY_LOCATION, ((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.put(DetailledItem.KEY_COPY_STATUS, ((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0)
                        copy.put(DetailledItem.KEY_COPY_DEPARTMENT, ((TextNode) node).text());
                    if (j == 2)
                        copy.put(DetailledItem.KEY_COPY_BARCODE,
                                ((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        copy.put(DetailledItem.KEY_COPY_RETURN, text.substring(text.length() - 10));
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    return result;
}

From source file:com.jimplush.goose.ContentExtractor.java

private Set<String> extractTags(Element node) {
    if (node.children().size() == 0)
        return NO_STRINGS;

    Elements elements = Selector.select(A_REL_TAG_SELECTOR, node);
    if (elements.size() == 0)
        return NO_STRINGS;

    Set<String> tags = new HashSet<String>(elements.size());
    for (Element el : elements) {
        String tag = el.text();/* w ww  .  ja  v  a  2  s. c o m*/
        if (!string.isNullOrEmpty(tag))
            tags.add(tag);
    }

    return tags;
}

From source file:de.geeksfactory.opacclient.apis.IOpac.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    List<SearchField> fields = new ArrayList<>();

    // Extract all search fields, except media types
    String html;//ww  w . j  a v a  2  s. c om
    try {
        html = httpGet(opac_url + dir + "/search_expert.htm", getDefaultEncoding());
    } catch (NotReachableException e) {
        html = httpGet(opac_url + dir + "/iopacie.htm", getDefaultEncoding());
    }
    Document doc = Jsoup.parse(html);
    Elements trs = doc.select("form tr:has(input:not([type=submit], [type=reset])), form tr:has(select)");
    for (Element tr : trs) {
        Elements tds = tr.children();
        if (tds.size() == 4) {
            // Two search fields next to each other in one row
            SearchField field1 = createSearchField(tds.get(0), tds.get(1));
            SearchField field2 = createSearchField(tds.get(2), tds.get(3));
            if (field1 != null) {
                fields.add(field1);
            }
            if (field2 != null) {
                fields.add(field2);
            }
        } else if (tds.size() == 2 || (tds.size() == 3 && tds.get(2).children().size() == 0)) {
            SearchField field = createSearchField(tds.get(0), tds.get(1));
            if (field != null) {
                fields.add(field);
            }
        }
    }

    if (fields.size() == 0 && doc.select("[name=sleStichwort]").size() > 0) {
        TextSearchField field = new TextSearchField();
        Element input = doc.select("input[name=sleStichwort]").first();
        field.setDisplayName(stringProvider.getString(StringProvider.FREE_SEARCH));
        field.setId(input.attr("name"));
        field.setHint("");
        fields.add(field);
    }

    // Extract available media types.
    // We have to parse JavaScript. Doing this with RegEx is evil.
    // But not as evil as including a JavaScript VM into the app.
    // And I honestly do not see another way.
    Pattern pattern_key = Pattern.compile("mtyp\\[[0-9]+\\]\\[\"typ\"\\] = \"([^\"]+)\";");
    Pattern pattern_value = Pattern.compile("mtyp\\[[0-9]+\\]\\[\"bez\"\\] = \"([^\"]+)\";");

    DropdownSearchField mtyp = new DropdownSearchField();
    try {
        try {
            html = httpGet(opac_url + dir + "/mtyp.js", getDefaultEncoding());
        } catch (NotReachableException e) {
            html = httpGet(opac_url + "/mtyp.js", getDefaultEncoding());
        }

        String[] parts = html.split("new Array\\(\\);");
        for (String part : parts) {
            Matcher matcher1 = pattern_key.matcher(part);
            String key = "";
            String value = "";
            if (matcher1.find()) {
                key = matcher1.group(1);
            }
            Matcher matcher2 = pattern_value.matcher(part);
            if (matcher2.find()) {
                value = matcher2.group(1);
            }
            if (!value.equals("")) {
                mtyp.addDropdownValue(key, value);
            }
        }
    } catch (IOException e) {
        try {
            html = httpGet(opac_url + dir + "/frames/search_form.php?bReset=1?bReset=1", getDefaultEncoding());
            doc = Jsoup.parse(html);

            for (Element opt : doc.select("#imtyp option")) {
                mtyp.addDropdownValue(opt.attr("value"), opt.text());
            }

        } catch (IOException e1) {
            e1.printStackTrace();
        }

    }
    if (mtyp.getDropdownValues() != null && !mtyp.getDropdownValues().isEmpty()) {
        mtyp.setDisplayName("Medientypen");
        mtyp.setId("Medientyp");
        fields.add(mtyp);
    }
    return fields;
}

From source file:org.shareok.data.sagedata.SageSourceDataHandlerImpl.java

private String[] getArticleAuthorsFromFullTextDoc(Document doc) throws NoHtmlComponentsFoundException {
    String[] authors = null;//from w  w  w .j a  v a2  s. co m
    List<String> auList = new ArrayList<>();

    try {
        Elements authorElements = doc.select("div.authors").get(0).select("span.contribDegrees");
        for (Element authSpan : authorElements) {
            String author = authSpan.children().get(0).text();
            if (null != author && !author.equals("")) {
                auList.add(author);
            }
        }
    } catch (Exception ex) {
        logger.error("Cannot get the authors for SAGE article!", ex);
        return null;
    }

    if (auList.size() > 0) {
        authors = auList.toArray(new String[auList.size()]);
    }

    return authors;
}