Example usage for org.jsoup.nodes Element nextElementSibling

List of usage examples for org.jsoup.nodes Element nextElementSibling

Introduction

In this page you can find the example usage for org.jsoup.nodes Element nextElementSibling.

Prototype

public Element nextElementSibling() 

Source Link

Document

Gets the next sibling element of this element.

Usage

From source file:gov.medicaid.screening.dao.impl.BBHTLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria./*from ww w  .  j a  v a2 s.co m*/
 * @param byName flag indicating it is a name search
 * @return the search result for licenses
 *
 * @throws URISyntaxException if an error occurs while building the URL.
 * @throws ClientProtocolException if client does not support protocol used.
 * @throws IOException if an error occurs while parsing response.
 * @throws ParseException if an error occurs while parsing response.
 * @throws ServiceException for any other problems encountered
 */
private SearchResult<License> getAllResults(BBHTLicenseSearchCriteria criteria, boolean byName)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearch);
    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());

    List<License> allLicenses = new ArrayList<License>();

    // switch to search by name screen
    if (byName) {
        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", "CD" }, { "_ctl7:rbtnSearch", "2" },
                        { "_ctl7:txtLicenseNumber", "" },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL());
        page = Jsoup.parse(EntityUtils.toString(entity));

        // get the data grid entries
        if (page.select("table#_ctl7_grdSearchResults").size() < 1) {
            throw new ParsingException(ErrorCode.MITA50002.getDesc());
        }

        Elements rows = page.select(GRID_ROW_SELECTOR);
        while (rows.size() > 0) {
            for (Element row : rows) {
                String url = row.select("a").first().attr("href");
                String licenseNo = row.select("td:eq(5)").text();
                HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url));
                response = client.execute(getDetail);
                verifyAndAuditCall(getSearchURL(), response);
                Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity()));
                allLicenses.add(parseLicense(licenseDetails, licenseNo));
            }
            rows.clear();

            // check for next page
            Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first();
            if (getLog() != null) {
                getLog().log(Level.DEBUG, "Current page is: " + currentPage.text());
            }
            Element pageLink = currentPage.nextElementSibling();
            if (pageLink != null && pageLink.hasAttr("href")) {
                if (getLog() != null) {
                    getLog().log(Level.DEBUG, "There are more results, getting the next page.");
                }

                String target = parseEventTarget(pageLink.attr("href"));
                entity = getResultPage(criteria, client, page, search, target, getSearchURL());
                page = Jsoup.parse(EntityUtils.toString(entity));
                rows = page.select(GRID_ROW_SELECTOR);
            }
        }

    } else { // search by license number (site supports only exact match)
        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) },
                        { "_ctl7:rbtnSearch", "1" },
                        { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        if (page.select("span#lblFormTitle").text().equals("License Details")) {
            String prefLicenseNo = criteria.getIdentifier();
            allLicenses.add(parseLicense(page, prefLicenseNo));
        }
    }

    SearchResult<License> searchResult = new SearchResult<License>();
    searchResult.setItems(allLicenses);
    return searchResult;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule2.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Editor) ~ dd, dt:contains(Edition Editor) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if ((!prev.text().trim().toLowerCase().startsWith("editor")
                    && !prev.text().trim().toLowerCase().startsWith("edition editor"))
                    || prev.text().trim().toLowerCase().contains("version")
                    || prev.text().trim().toLowerCase().endsWith("draft:")) {
                skip = true;/*from  w w  w. ja  v  a2 s .  com*/
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("editor")
                        || next.text().trim().toLowerCase().contains("edition editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", "This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule8.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("h4:contains(Editor) ~ blockquote");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("h4")) {
            if ((!prev.text().trim().toLowerCase().startsWith("editor")
                    && !prev.text().trim().toLowerCase().startsWith("edition editor"))
                    || prev.text().trim().toLowerCase().endsWith("version:")
                    || prev.text().trim().toLowerCase().endsWith("draft:")) {
                skip = true;//from  w  w w  .j a  v a2  s.c  o m
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("editor")
                        || next.text().trim().toLowerCase().contains("edition editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", "This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("h4"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:gov.medicaid.screening.dao.impl.NursingLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria./*from   ww  w.  j  a  v a2  s  .  c o  m*/
 * @param byName flag indicating it is a name search
 * @return the search result for licenses
 *
 * @throws URISyntaxException if an error occurs while building the URL.
 * @throws ClientProtocolException if client does not support protocol used.
 * @throws IOException if an error occurs while parsing response.
 * @throws ParseException if an error occurs while parsing response.
 * @throws ServiceException for any other problems encountered
 */
private SearchResult<License> getAllResults(NursingLicenseSearchCriteria criteria, boolean byName)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());
    client.setCookieStore(loginAsPublicUser());

    HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearch);
    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());

    List<License> allLicenses = new ArrayList<License>();

    // switch to search by name screen
    if (byName) {
        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", "R" }, { "_ctl7:rbtnSearch", "2" },
                        { "_ctl7:txtCheckDigit", "" }, { "_ctl7:txtLicenseNumber", "" },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL());
        page = Jsoup.parse(EntityUtils.toString(entity));

        // get the data grid entries
        if (page.select("table#_ctl7_grdSearchResults").size() < 1) {
            throw new ParsingException(ErrorCode.MITA50002.getDesc());
        }

        Elements rows = page.select(GRID_ROW_SELECTOR);
        while (rows.size() > 0) {
            for (Element row : rows) {
                String url = row.select("a").first().attr("href");
                String licenseNo = row.select("td:eq(4)").text();
                HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url));
                response = client.execute(getDetail);
                verifyAndAuditCall(getSearchURL(), response);
                Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity()));
                allLicenses.add(parseLicense(licenseDetails, licenseNo.substring(0, 1)));
            }
            rows.clear();

            // check for next page
            Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first();
            if (getLog() != null) {
                getLog().log(Level.DEBUG, "Current page is: " + currentPage.text());
            }
            Element pageLink = currentPage.nextElementSibling();
            if (pageLink != null && pageLink.hasAttr("href")) {
                if (getLog() != null) {
                    getLog().log(Level.DEBUG, "There are more results, getting the next page.");
                }

                String target = parseEventTarget(pageLink.attr("href"));
                entity = getResultPage(criteria, client, page, search, target, getSearchURL());
                page = Jsoup.parse(EntityUtils.toString(entity));
                rows = page.select(GRID_ROW_SELECTOR);
            }
        }

    } else { // search by license number (site supports only exact match)

        HttpEntity entity = postForm(getSearchURL(), client, search,
                new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" },
                        { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) },
                        { "_ctl7:rbtnSearch", "1" },
                        { "_ctl7:txtCheckDigit", Util.defaultString(criteria.getCheckDigit()) },
                        { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } },
                true);

        page = Jsoup.parse(EntityUtils.toString(entity));
        if (page.select("span#lblFormTitle").text().equals("License Details")) {
            String prefLicenseType = criteria.getLicenseType().getName();
            allLicenses.add(parseLicense(page, prefLicenseType));
        }
    }

    SearchResult<License> searchResult = new SearchResult<License>();
    searchResult.setItems(allLicenses);
    return searchResult;
}

From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    Document navbarDoc = Jsoup.parse(getNavbarDoc().replace("&nbsp;", ""));
    Element select = navbarDoc.select("select[name=week]").first();

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();

    String info = navbarDoc.select(".description").text();
    String stand;//from  w w w .j  a va  2  s .c om
    try {
        stand = info.substring(info.indexOf("Stand:"));
    } catch (Exception e) {
        stand = "";
    }

    for (Element option : select.children()) {
        String week = option.attr("value");
        String letter = data.optString("letter", "w");
        if (data.optBoolean("single_classes", false)) {
            int classNumber = 1;
            for (String klasse : getAllClasses()) {
                String paddedNumber = String.format("%05d", classNumber);
                String url;
                if (data.optBoolean("w_after_number", false))
                    url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm";
                else
                    url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm";

                Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
                Elements days = doc.select("#vertretung > p > b, #vertretung > b");
                for (Element day : days) {
                    VertretungsplanTag tag = getTagByDatum(tage, day.text());
                    tag.setStand(stand);
                    tag.setDatum(day.text());
                    Element next = null;
                    if (day.parent().tagName().equals("p")) {
                        next = day.parent().nextElementSibling().nextElementSibling();
                    } else
                        next = day.parent().select("p").first().nextElementSibling();
                    if (next.className().equals("subst")) {
                        //Vertretungstabelle
                        if (next.text().contains("Vertretungen sind nicht freigegeben"))
                            continue;
                        parseVertretungsplanTable(next, data, tag);
                    } else {
                        //Nachrichten
                        parseNachrichten(next, data, tag);
                        next = next.nextElementSibling().nextElementSibling();
                        parseVertretungsplanTable(next, data, tag);
                    }
                    writeTagByDatum(tage, tag);
                }

                classNumber++;
            }
        } else {
            String url;
            if (data.optBoolean("w_after_number", false))
                url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm";
            else
                url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm";
            Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
            Elements days = doc.select("#vertretung > p > b, #vertretung > b");
            for (Element day : days) {
                VertretungsplanTag tag = getTagByDatum(tage, day.text());
                tag.setStand(stand);
                tag.setDatum(day.text());
                Element next = null;
                if (day.parent().tagName().equals("p")) {
                    next = day.parent().nextElementSibling().nextElementSibling();
                } else
                    next = day.parent().select("p").first().nextElementSibling();
                if (next.className().equals("subst")) {
                    //Vertretungstabelle
                    if (next.text().contains("Vertretungen sind nicht freigegeben"))
                        continue;
                    parseVertretungsplanTable(next, data, tag);
                } else {
                    //Nachrichten
                    parseNachrichten(next, data, tag);
                    next = next.nextElementSibling().nextElementSibling();
                    parseVertretungsplanTable(next, data, tag);
                }
                tage.add(tag);
            }
        }
        v.setTage(tage);
    }
    return v;
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * alot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
 * boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
 * so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
 *
 *
 * @param node//ww w.j  a  v  a  2s.com
 * @return
 */
private boolean isOkToBoost(Element node) {

    int stepsAway = 0;

    Element sibling = node.nextElementSibling();
    while (sibling != null) {

        if (sibling.tagName().equals("p")) {
            if (stepsAway >= 3) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Next paragraph is too far away, not boosting");
                }
                return false;
            }

            String paraText = sibling.text();
            WordStats wordStats = StopWords.getStopWordCount(paraText);
            if (wordStats.getStopWordCount() > 5) {
                if (logger.isDebugEnabled()) {
                    logger.debug("We're gonna boost this node, seems contenty");
                }
                return true;
            }

        }

        // increase how far away the next paragraph is from this node
        stepsAway++;

        sibling = sibling.nextElementSibling();
    }

    return false;
}

From source file:me.vertretungsplan.parser.DaVinciParser.java

@NotNull
void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();

    Element titleElem;
    if (doc.select("h1.list-table-caption").size() > 0) {
        titleElem = doc.select("h1.list-table-caption").first();
    } else {//from   w  w w.j a va 2s  .  c  o m
        // DaVinci 5
        titleElem = doc.select("h2").first();
    }
    String title = titleElem.text();
    String klasse = null;
    // title can either be date or class
    Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}");
    Matcher dateMatcher = datePattern.matcher(title);
    if (dateMatcher.find()) {
        day.setDateString(dateMatcher.group());
        day.setDate(ParserUtils.parseDate(dateMatcher.group()));
    } else {
        klasse = title;
        String nextText = titleElem.nextElementSibling().text();
        if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
            day.setDateString(nextText);
            day.setDate(ParserUtils.parseDate(nextText));
        } else {
            // could not find date, must be multiple days
            day = null;
        }
    }

    for (Element p : doc.select(".row:has(h1.list-table-caption) p")) {
        for (TextNode node : p.textNodes()) {
            if (!node.text().trim().isEmpty() && day != null)
                day.addMessage(node.text().trim());
        }
    }
    for (Element message : doc.select(".callout")) {
        for (TextNode node : message.textNodes()) {
            if (!node.text().trim().isEmpty())
                day.addMessage(node.text().trim());
        }
    }

    Element lastChangeElem = doc.select(".row.copyright div").first();
    if (lastChangeElem == null) {
        // DaVinci 5
        lastChangeElem = doc.select("h1").first();
    }
    String lastChange = lastChangeElem.ownText();
    Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
    Matcher matcher = pattern.matcher(lastChange);
    if (matcher.find()) {
        LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm")
                .parseLocalDateTime(matcher.group(1));
        if (day != null) {
            day.setLastChange(lastChangeTime);
        } else {
            schedule.setLastChange(lastChangeTime);
        }
    } else {
        Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})");
        Matcher matcher2 = pattern2.matcher(lastChange);
        if (matcher2.find()) {
            LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm")
                    .parseLocalDateTime(matcher2.group(1));
            if (day != null) {
                day.setLastChange(lastChangeTime);
            } else {
                schedule.setLastChange(lastChangeTime);
            }
        }
    }

    if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
        Element table = doc.select(".list-table, table").first();
        parseDaVinciTable(table, schedule, klasse, day, colorProvider);
    }

    if (day != null) {
        schedule.addDay(day);
    }
}

From source file:crawler.HackerEarthCrawler.java

@Override
public void crawl() {

    int flag = 0;

    //set of urls which should be crawled
    TreeSet<String> linksset = new TreeSet<String>();
    TreeSet<String> tempset = new TreeSet<String>();
    TreeSet<String> tutorialset = new TreeSet<String>();
    //final set of problem urls
    TreeSet<String> problemset = new TreeSet<String>();
    //visited for maintaing status of if url is already crawled or not
    TreeMap<String, Integer> visited = new TreeMap<String, Integer>();

    //add base url
    linksset.add(baseUrl);/*from  w  ww  .  jav  a2  s. c om*/
    //mark base url as not crawled
    visited.put(baseUrl, 0);

    try {
        while (true) {
            flag = 0;
            tempset.clear();

            for (String str : linksset) {
                //check if url is already crawled or not and it has valid domain name
                if ((visited.get(str) == 0) && (str.startsWith("https://www.hackerearth.com/"))) {
                    System.out.println("crawling  " + str);

                    //retriving response of current url as document
                    Document doc = Jsoup.connect(str).timeout(0).userAgent(
                            "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")
                            .referrer("http://www.google.com").ignoreHttpErrors(true).get();
                    //retriving all urls from current page
                    Elements links = doc.select("a[href]");

                    //mark url as crawled
                    visited.put(str, 1);

                    //mark flag as url is crawled
                    flag = 1;
                    //retrive all urls
                    for (Element link : links) {
                        if (link.absUrl("href").endsWith("/tutorial/")) {
                            tutorialset.add(link.absUrl("href"));
                        }
                        //check if url is problem url then add it in problemurlset
                        if (link.absUrl("href").startsWith("https://www.hackerearth.com/")
                                && isProblemUrl(link.absUrl("href"))) {
                            problemset.add(link.absUrl("href"));
                        }
                        //check if url has valid domain and it has problem urls or not
                        if (link.absUrl("href").contains(("https://www.hackerearth.com/"))
                                && isCrawlable(link.absUrl("href"))) {
                            //if link is not visited then mark it as uncrawled
                            if (!visited.containsKey(link.absUrl("href"))) {
                                visited.put(link.absUrl("href"), 0);
                            }
                            //add it in tempsetorary set
                            tempset.add(link.absUrl("href"));
                            //System.out.println("\n  base: "+str+" ::: link  : " + link.absUrl("href"));
                        }
                    }
                }
            }
            //if nothing is left to crawl break the loop
            if (flag == 0) {
                break;
            }
            //add all retrieved links to linksset
            linksset.addAll(tempset);
        }

        System.out.println("\n\ntotal problem urls " + problemset.size());

        int i = 0;
        for (String str : problemset) {
            System.out.println("link " + i + " : " + str);
            i++;
        }

    } catch (IOException ex) {
        Logger.getLogger(HackerEarthCrawler.class.getName()).log(Level.SEVERE, null, ex);
    }

    //scrap and store into database
    //for every problem url scrap problem page
    for (String problemUrl : problemset) {

        System.out.println("problemUrl :" + problemUrl);
        try {
            //create problem class to store in database
            Problem problem = new Problem();
            String problemSIOC = "", problemIOC = "";
            String problemTitle = "", problemStatement = "", problemInput = "", problemOutput = "",
                    problemConstraints = "";
            String sampleInput = "", sampleOutput = "";
            String problemExplanation = "";
            //set default timelimit to 1 second
            double problemTimeLimit = 1.0;
            ArrayList<String> tags = new ArrayList<String>();

            //get response for given problem url
            Response response = Jsoup.connect(problemUrl).execute();
            Document doc = response.parse();

            //retrieve problem title from page
            Element elementTitle = doc.getElementsByTag("title").first();
            StringTokenizer stTitle = new StringTokenizer(elementTitle.text(), "|");
            problemTitle = stTitle.nextToken().trim();

            Element content = doc.getElementsByClass("starwars-lab").first();
            problemSIOC = content.text();
            Elements e = content.children();

            //to find problem statement
            String breakloop[] = { "input", "input:", "input :", "input format:", "input format :",
                    "input format", "Input and output", "constraints :", "constraints:", "constraints",
                    "$$Input :$$" };
            flag = 0;
            for (Element p : e) {
                String tempStatement = "";
                for (Element pp : p.getAllElements()) {

                    for (String strbreak : breakloop) {
                        if (StringUtils.equalsIgnoreCase(pp.ownText(), strbreak)) {
                            //System.out.println("strbreak :"+strbreak);

                            tempStatement = p.text().substring(0,
                                    p.text().toLowerCase().indexOf(strbreak.toLowerCase()));
                            // System.out.println("temp "+tempStatement);
                            flag = 1;
                            break;
                        }
                    }
                }

                if (flag == 1) {
                    problemStatement += tempStatement;
                    //remove extra space at end
                    if (tempStatement.length() == 0) {
                        problemStatement = problemStatement.substring(0, problemStatement.length() - 1);
                    }
                    break;
                }
                problemStatement += p.text() + " ";
            }

            System.out.println("problemSIOC :" + problemSIOC);
            System.out.println("problemStatement :" + problemStatement);

            if (problemStatement.length() <= problemSIOC.length()) {
                //remove problem statement from whole text and remove extra spaces at the beginning and the end
                problemIOC = problemSIOC.substring(problemStatement.length()).trim();
            } else {
                problemIOC = "";
            }

            System.out.println("problemIOC :" + problemIOC);

            //keywords for identifying input
            String decideInput[] = { "Input format :", "Input format:", "Input format", "inputformat:",
                    "inputformat :", "inputformat", "input and output", "input :", "input:", "input" };
            //keywords for identifying output
            String decideOutput[] = { "output format :", "output format:", "Output format", "outputformat:",
                    "outputformat :", "outputformat", "output :", "output:", "output" };
            //keywords for identifying constraint
            String decideConstraint[] = { "constraints:", "constraints :", "constraints", "Constraints :",
                    "constraint:", "constraint :", "constraint", "Contraints :" };

            int posin = 0, posoutput = 0, poscon = 0, idxin, idxout, idxcon, flaginput = 0, flagoutput = 0,
                    flagcon = 0, inlen = 0, outlen = 0, conlen = 0;

            //find inputformat position,length of keyword
            for (idxin = 0; idxin < decideInput.length; idxin++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideInput[idxin])) {

                    posin = problemIOC.toLowerCase().indexOf(decideInput[idxin].toLowerCase());
                    flaginput = 1;
                    inlen = decideInput[idxin].length();

                    //decide it is keyowrd for actucal input or it is "sample input"
                    if (StringUtils.containsIgnoreCase(problemIOC, "sample input")) {
                        if (posin > problemIOC.toLowerCase().indexOf("sample input")) {
                            flaginput = 0;
                            inlen = 0;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
            }

            //find outputformat position,length of keyword
            for (idxout = 0; idxout < decideOutput.length; idxout++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideOutput[idxout])) {
                    posoutput = problemIOC.toLowerCase().indexOf(decideOutput[idxout].toLowerCase());
                    flagoutput = 1;
                    outlen = decideOutput[idxout].length();
                    break;
                }
            }

            //find constraint position,length of keyword
            for (idxcon = 0; idxcon < decideConstraint.length; idxcon++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideConstraint[idxcon])) {
                    poscon = problemIOC.toLowerCase().indexOf(decideConstraint[idxcon].toLowerCase());
                    flagcon = 1;
                    conlen = decideConstraint[idxcon].length();
                    break;
                }
            }

            System.out.println("input " + flaginput + " " + inlen + " " + posin);
            System.out.println("output " + flagoutput + " " + outlen + " " + posoutput);
            System.out.println("constraint " + flagcon + " " + conlen + " " + poscon);
            //retrieve problem input and output if present in problem page

            //if input format is present
            if (flaginput == 1) {
                //if input keyword is "input and output" and contraint is present in problem page
                if (idxin == 6 && flagcon == 1) {
                    problemInput = problemIOC.substring(inlen, poscon);
                }
                //if input keyword is "input and output" and contraint is not present in problem page
                else if (idxin == 6 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen);
                }
                //if output format and constraint is present
                else if (flagoutput == 1 && flagcon == 1) {
                    //if constraint is present before input format
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    }
                    //if constraint is present before sample
                    else if (poscon < posoutput) {
                        problemInput = problemIOC.substring(inlen, poscon);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    } else {
                        problemInput = problemIOC.substring(inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen, poscon);
                    }
                }
                //if constraint is not present
                else if (flagoutput == 1 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen, posoutput);
                    problemOutput = problemIOC.substring(posoutput + outlen);
                } else if (flagoutput == 0 && flagcon == 1) {
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen);
                    } else {
                        problemInput = problemIOC.substring(poscon + conlen, posin);
                    }
                    problemOutput = "";
                } else {
                    problemInput = problemIOC.substring(inlen);
                    problemOutput = "";
                }
            }
            //if input format and output format is not present
            else {
                problemInput = "";
                problemOutput = "";
            }

            //if constraint is present
            if (flagcon == 1) {
                //if constraint is present before input format
                if (poscon < posin) {
                    problemConstraints = problemIOC.substring(0, posin);
                }
                //if constraint is present before output format
                else if (poscon < posoutput) {
                    problemConstraints = problemIOC.substring(poscon + conlen, posoutput);
                } else {
                    problemConstraints = problemIOC.substring(poscon + conlen);
                }
            }

            System.out.println("problemInput :" + problemInput);
            System.out.println("problemOutput :" + problemOutput);
            System.out.println("problemConstraints :" + problemConstraints);

            //retrieve problem tags from problem page
            Element elementtag = doc.getElementsByClass("problem-tags").first().child(1);
            StringTokenizer st = new StringTokenizer(elementtag.text(), ",");
            while (st.hasMoreTokens()) {
                tags.add(st.nextToken().trim());
            }

            //retrieve sample input sample output if present
            Element elementSIO = doc.getElementsByClass("input-output-container").first();
            //if sample input output is present
            if (elementSIO != null) {
                //find position of sample output
                int soutpos = elementSIO.text().indexOf("SAMPLE OUTPUT");
                sampleInput = elementSIO.text().substring(12, soutpos);
                sampleOutput = elementSIO.text().substring(soutpos + 13);
                System.out.println("Sample input :\n" + sampleInput + "\n\n\n");
                System.out.println("Sample Output :\n" + sampleOutput);
            } else {
                sampleInput = "";
                sampleOutput = "";
            }

            //retrieve problem explanation from problem page if present
            Element elementExplanation = doc.getElementsByClass("standard-margin").first().child(0);
            if (elementExplanation.text().toLowerCase().contains("explanation")) {
                problemExplanation = elementExplanation.nextElementSibling().text();
            }
            System.out.println("Explanation :" + problemExplanation);

            //retrieve timelimit
            Element elementTL = doc.getElementsByClass("problem-guidelines").first().child(0).child(1);
            StringTokenizer stTL = new StringTokenizer(elementTL.ownText(), " ");
            problemTimeLimit = Double.parseDouble(stTL.nextToken());

            //System.out.println("problemTimeLimit :"+problemTimeLimit);
            //set all retrieved information to problem class
            problem.setProblemUrl(problemUrl);
            if (problemTitle.length() == 0) {
                problemTitle = null;
            }
            if (problemStatement.length() == 0) {
                problemStatement = null;
            }
            if (problemInput.length() == 0) {
                problemInput = null;
            }
            if (problemOutput.length() == 0) {
                problemOutput = null;
            }
            if (problemExplanation.length() == 0) {
                problemExplanation = null;
            }
            if (problemConstraints.length() == 0) {
                problemConstraints = null;
            }
            problem.setTitle(problemTitle);
            problem.setProblemUrl(problemUrl);
            problem.setProblemStatement(problemStatement);
            problem.setInputFormat(problemInput);
            problem.setOutputFormat(problemOutput);
            problem.setTimeLimit(problemTimeLimit);
            problem.setExplanation(problemExplanation);
            problem.setConstraints(problemConstraints);

            //set sample input output to problem class
            SampleInputOutput sampleInputOutput = new SampleInputOutput(problem, sampleInput, sampleOutput);
            problem.getSampleInputOutputs().add(sampleInputOutput);
            //set platform as hackerearth
            problem.setPlatform(Platform.HackerEarth);
            for (String strtag : tags) {
                problem.getTags().add(strtag);
            }

            //store in database
            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Problem p where p.problemUrl = :problem_url";
                Problem oldProblem = (Problem) session.createQuery(hql).setString("problem_url", problemUrl)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    problem.setId(oldProblem.getId());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(problem);
                } else {
                    task = "saved";
                    session.save(problem);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, problem.getProblemUrl() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + problemUrl, e);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }
        } catch (Exception ee) {
            System.out.println(ee.toString());
        }
    }

    System.out.println("\n\n\n\ntutorial urls\n\n");
    try {

        for (String tutorialurl : tutorialset) {
            //System.out.println(tutorialurl+"\n\n");
            Response tutorialres = Jsoup.connect(tutorialurl).execute();
            Document doc = tutorialres.parse();

            Tutorial tutorial = new Tutorial();
            tutorial.setContent(doc.getElementsByClass("tutorial").first().text());

            tutorial.setName(baseUrl);
            tutorialurl = tutorialurl.substring(0, tutorialurl.length() - 10);
            StringTokenizer tutorialtok = new StringTokenizer(tutorialurl, "/");

            String tempstr = "";
            while (tutorialtok.hasMoreTokens()) {
                tempstr = tutorialtok.nextToken();
            }

            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Tutorial p where p.name = :name";
                Tutorial oldProblem = (Tutorial) session.createQuery(hql).setString("name", tempstr)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    tutorial.setName(oldProblem.getName());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(tutorial);
                } else {
                    task = "saved";
                    tutorial.setName(tempstr);
                    session.save(tutorial);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, tutorial.getName() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + tempstr, ee);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }

        }
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:de.geeksfactory.opacclient.apis.Zones22.java

@Override
public AccountData account(Account acc)
        throws IOException, NotReachableException, JSONException, SocketException, OpacErrorException {
    Document login = login(acc);//from   w w  w .j a v a  2  s.c o  m
    if (login == null)
        return null;

    AccountData res = new AccountData(acc.getId());

    String lent_link = null;
    String res_link = null;
    int lent_cnt = -1;
    int res_cnt = -1;
    for (Element td : login.select(
            ".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, .CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) {
        String section = td.text().trim();
        if (section.contains("Entliehene Medien")) {
            lent_link = td.select("a").attr("href");
            lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Vormerkungen")) {
            res_link = td.select("a").attr("href");
            res_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Kontostand")) {
            res.setPendingFees(td.nextElementSibling().text().trim());
        } else if (section.matches("Ausweis g.ltig bis")) {
            res.setValidUntil(td.nextElementSibling().text().trim());
        }
    }
    assert (lent_cnt >= 0);
    assert (res_cnt >= 0);
    if (lent_link == null)
        return null;

    String lent_html = httpGet(opac_url + "/" + lent_link.replace("utf-8?Method", "utf-8&Method"),
            getDefaultEncoding());
    Document lent_doc = Jsoup.parse(lent_html);
    List<Map<String, String>> lent = new ArrayList<Map<String, String>>();

    SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy", Locale.GERMAN);
    Pattern id_pat = Pattern.compile("javascript:renewItem\\('[0-9]+','(.*)'\\)");

    for (Element table : lent_doc
            .select(".LoansBrowseItemDetailsCellStripe table, .LoansBrowseItemDetailsCell table")) {
        Map<String, String> item = new HashMap<String, String>();

        for (Element tr : table.select("tr")) {
            String desc = tr.select(".LoanBrowseFieldNameCell").text().trim();
            String value = tr.select(".LoanBrowseFieldDataCell").text().trim();
            if (desc.equals("Titel"))
                item.put(AccountData.KEY_LENT_TITLE, value);
            if (desc.equals("Verfasser"))
                item.put(AccountData.KEY_LENT_AUTHOR, value);
            if (desc.equals("Mediennummer"))
                item.put(AccountData.KEY_LENT_BARCODE, value);
            if (desc.equals("ausgeliehen in"))
                item.put(AccountData.KEY_LENT_BRANCH, value);
            if (desc.matches("F.+lligkeits.*datum")) {
                value = value.split(" ")[0];
                item.put(AccountData.KEY_LENT_DEADLINE, value);
                try {
                    item.put(AccountData.KEY_LENT_DEADLINE_TIMESTAMP,
                            String.valueOf(sdf.parse(value).getTime()));
                } catch (ParseException e) {
                    e.printStackTrace();
                }
            }
        }
        if (table.select(".button[Title~=Zum]").size() == 1) {
            Matcher matcher1 = id_pat.matcher(table.select(".button[Title~=Zum]").attr("href"));
            if (matcher1.matches()) {
                item.put(AccountData.KEY_LENT_LINK, matcher1.group(1));
            }
        }
        lent.add(item);
    }
    res.setLent(lent);
    assert (lent_cnt <= lent.size());

    List<Map<String, String>> reservations = new ArrayList<Map<String, String>>();
    String res_html = httpGet(opac_url + "/" + res_link, getDefaultEncoding());
    Document res_doc = Jsoup.parse(res_html);

    for (Element table : res_doc
            .select(".MessageBrowseItemDetailsCell table, .MessageBrowseItemDetailsCellStripe table")) {
        Map<String, String> item = new HashMap<String, String>();

        for (Element tr : table.select("tr")) {
            String desc = tr.select(".MessageBrowseFieldNameCell").text().trim();
            String value = tr.select(".MessageBrowseFieldDataCell").text().trim();
            if (desc.equals("Titel"))
                item.put(AccountData.KEY_RESERVATION_TITLE, value);
            if (desc.equals("Publikationsform"))
                item.put(AccountData.KEY_RESERVATION_FORMAT, value);
            if (desc.equals("Liefern an"))
                item.put(AccountData.KEY_RESERVATION_BRANCH, value);
            if (desc.equals("Status"))
                item.put(AccountData.KEY_RESERVATION_READY, value);
        }
        if ("Gelscht".equals(item.get(AccountData.KEY_RESERVATION_READY))) {
            continue;
        }
        reservations.add(item);
    }
    res.setReservations(reservations);
    assert (reservations.size() >= res_cnt);

    return res;
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parses an Untis substitution schedule table
 *
 * @param table        the <code>table</code> Element from the HTML document
 * @param data         {@link SubstitutionScheduleData#getData()}
 * @param day          the {@link SubstitutionScheduleDay} where the substitutions will be stored
 * @param defaultClass the class that should be set if there is no class column in the table
 *///from ww  w.j a v a  2  s  .  c o m
private void parseSubstitutionScheduleTable(Element table, JSONObject data, SubstitutionScheduleDay day,
        String defaultClass) throws JSONException, CredentialInvalidException {
    if (data.optBoolean(PARAM_CLASS_IN_EXTRA_LINE) || data.optBoolean("class_in_extra_line")) { // backwards compatibility
        for (Element element : table.select("td.inline_header")) {
            String className = getClassName(element.text(), data);
            if (isValidClass(className)) {
                Element zeile = null;
                try {
                    zeile = element.parent().nextElementSibling();
                    if (zeile.select("td") == null) {
                        zeile = zeile.nextElementSibling();
                    }
                    int skipLines = 0;
                    while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) {
                        if (skipLines > 0) {
                            skipLines--;
                            zeile = zeile.nextElementSibling();
                            continue;
                        }

                        Substitution v = new Substitution();

                        int i = 0;
                        for (Element spalte : zeile.select("td")) {
                            String text = spalte.text();
                            if (isEmpty(text)) {
                                i++;
                                continue;
                            }

                            int skipLinesForThisColumn = 0;
                            Element nextLine = zeile.nextElementSibling();
                            boolean continueSkippingLines = true;
                            while (continueSkippingLines) {
                                if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                                    Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                                    if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                            .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                                        // Continued in the next line
                                        text += " " + columnInNextLine.text();
                                        skipLinesForThisColumn++;
                                        nextLine = nextLine.nextElementSibling();
                                    } else {
                                        continueSkippingLines = false;
                                    }
                                } else {
                                    continueSkippingLines = false;
                                }
                            }
                            if (skipLinesForThisColumn > skipLines)
                                skipLines = skipLinesForThisColumn;

                            String type = data.getJSONArray(PARAM_COLUMNS).getString(i);

                            switch (type) {
                            case "lesson":
                                v.setLesson(text);
                                break;
                            case "subject":
                                handleSubject(v, spalte);
                                break;
                            case "previousSubject":
                                v.setPreviousSubject(text);
                                break;
                            case "type":
                                v.setType(text);
                                v.setColor(colorProvider.getColor(text));
                                break;
                            case "type-entfall":
                                if (text.equals("x")) {
                                    v.setType("Entfall");
                                    v.setColor(colorProvider.getColor("Entfall"));
                                } else {
                                    v.setType("Vertretung");
                                    v.setColor(colorProvider.getColor("Vertretung"));
                                }
                                break;
                            case "room":
                                handleRoom(v, spalte);
                                break;
                            case "teacher":
                                handleTeacher(v, spalte, data);
                                break;
                            case "previousTeacher":
                                v.setPreviousTeachers(splitTeachers(text, data));
                                break;
                            case "desc":
                                v.setDesc(text);
                                break;
                            case "desc-type":
                                v.setDesc(text);
                                String recognizedType = recognizeType(text);
                                v.setType(recognizedType);
                                v.setColor(colorProvider.getColor(recognizedType));
                                break;
                            case "previousRoom":
                                v.setPreviousRoom(text);
                                break;
                            case "substitutionFrom":
                                v.setSubstitutionFrom(text);
                                break;
                            case "teacherTo":
                                v.setTeacherTo(text);
                                break;
                            case "ignore":
                                break;
                            case "date": // used by UntisSubstitutionParser
                                break;
                            default:
                                throw new IllegalArgumentException("Unknown column type: " + type);
                            }
                            i++;
                        }

                        autoDetectType(data, zeile, v);

                        v.getClasses().add(className);

                        if (v.getLesson() != null && !v.getLesson().equals("")) {
                            day.addSubstitution(v);
                        }

                        zeile = zeile.nextElementSibling();

                    }
                } catch (Throwable e) {

                    e.printStackTrace();
                }
            }
        }
    } else {
        boolean hasType = false;
        for (int i = 0; i < data.getJSONArray(PARAM_COLUMNS).length(); i++) {
            if (data.getJSONArray(PARAM_COLUMNS).getString(i).equals("type")) {
                hasType = true;
            }
        }
        int skipLines = 0;
        for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), "
                + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]):gt(0)")) {
            if (skipLines > 0) {
                skipLines--;
                continue;
            }

            Substitution v = new Substitution();
            String klassen = defaultClass != null ? defaultClass : "";
            int i = 0;
            for (Element spalte : zeile.select("td")) {
                String text = spalte.text();

                String type = data.getJSONArray(PARAM_COLUMNS).getString(i);
                if (isEmpty(text) && !type.equals("type-entfall")) {
                    i++;
                    continue;
                }

                int skipLinesForThisColumn = 0;
                Element nextLine = zeile.nextElementSibling();
                boolean continueSkippingLines = true;
                while (continueSkippingLines) {
                    if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                        Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                        if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                            // Continued in the next line
                            text += " " + columnInNextLine.text();
                            skipLinesForThisColumn++;
                            nextLine = nextLine.nextElementSibling();
                        } else {
                            continueSkippingLines = false;
                        }
                    } else {
                        continueSkippingLines = false;
                    }
                }
                if (skipLinesForThisColumn > skipLines)
                    skipLines = skipLinesForThisColumn;

                switch (type) {
                case "lesson":
                    v.setLesson(text);
                    break;
                case "subject":
                    handleSubject(v, spalte);
                    break;
                case "previousSubject":
                    v.setPreviousSubject(text);
                    break;
                case "type":
                    v.setType(text);
                    v.setColor(colorProvider.getColor(text));
                    break;
                case "type-entfall":
                    if (text.equals("x")) {
                        v.setType("Entfall");
                        v.setColor(colorProvider.getColor("Entfall"));
                    } else if (!hasType) {
                        v.setType("Vertretung");
                        v.setColor(colorProvider.getColor("Vertretung"));
                    }
                    break;
                case "room":
                    handleRoom(v, spalte);
                    break;
                case "previousRoom":
                    v.setPreviousRoom(text);
                    break;
                case "desc":
                    v.setDesc(text);
                    break;
                case "desc-type":
                    v.setDesc(text);
                    String recognizedType = recognizeType(text);
                    v.setType(recognizedType);
                    v.setColor(colorProvider.getColor(recognizedType));
                    break;
                case "teacher":
                    handleTeacher(v, spalte, data);
                    break;
                case "previousTeacher":
                    v.setPreviousTeachers(splitTeachers(text, data));
                    break;
                case "substitutionFrom":
                    v.setSubstitutionFrom(text);
                    break;
                case "teacherTo":
                    v.setTeacherTo(text);
                    break;
                case "class":
                    klassen = getClassName(text, data);
                    break;
                case "ignore":
                    break;
                case "date": // used by UntisSubstitutionParser
                    break;
                default:
                    throw new IllegalArgumentException("Unknown column type: " + type);
                }
                i++;
            }

            if (v.getLesson() == null || v.getLesson().equals("")) {
                continue;
            }

            autoDetectType(data, zeile, v);

            List<String> affectedClasses;

            // Detect things like "7"
            Pattern singlePattern = Pattern.compile("(\\d+)");
            Matcher singleMatcher = singlePattern.matcher(klassen);

            // Detect things like "5-12"
            Pattern rangePattern = Pattern.compile("(\\d+) ?- ?(\\d+)");
            Matcher rangeMatcher = rangePattern.matcher(klassen);

            Pattern pattern2 = Pattern.compile("^(\\d+).*");

            if (rangeMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int min = Integer.parseInt(rangeMatcher.group(1));
                int max = Integer.parseInt(rangeMatcher.group(2));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches()) {
                            int num = Integer.parseInt(matcher2.group(1));
                            if (min <= num && num <= max)
                                affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else if (singleMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int grade = Integer.parseInt(singleMatcher.group(1));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches() && grade == Integer.parseInt(matcher2.group(1))) {
                            affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else {
                if (data.optBoolean(PARAM_CLASSES_SEPARATED, true)
                        && data.optBoolean("classes_separated", true)) { // backwards compatibility
                    affectedClasses = Arrays.asList(klassen.split(", "));
                } else {
                    affectedClasses = new ArrayList<>();
                    try {
                        for (String klasse : getAllClasses()) { // TODO: is there a better way?
                            StringBuilder regex = new StringBuilder();
                            for (char character : klasse.toCharArray()) {
                                if (character == '?') {
                                    regex.append("\\?");
                                } else {
                                    regex.append(character);
                                }
                                regex.append(".*");
                            }
                            if (klassen.matches(regex.toString())) {
                                affectedClasses.add(klasse);
                            }
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

            for (String klasse : affectedClasses) {
                if (isValidClass(klasse)) {
                    v.getClasses().add(klasse);
                }
            }

            if (data.optBoolean(PARAM_MERGE_WITH_DIFFERENT_TYPE, false)) {
                boolean found = false;
                for (Substitution subst : day.getSubstitutions()) {
                    if (subst.equalsExcludingType(v)) {
                        found = true;

                        if (v.getType().equals("Vertretung")) {
                            subst.setType("Vertretung");
                            subst.setColor(colorProvider.getColor("Vertretung"));
                        }

                        break;
                    }
                }
                if (!found) {
                    day.addSubstitution(v);
                }
            } else {
                day.addSubstitution(v);
            }
        }
    }
}