List of usage examples for org.jsoup.nodes Element nextElementSibling
public Element nextElementSibling()
From source file:gov.medicaid.screening.dao.impl.BBHTLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria./*from ww w . j a v a2 s.co m*/ * @param byName flag indicating it is a name search * @return the search result for licenses * * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<License> getAllResults(BBHTLicenseSearchCriteria criteria, boolean byName) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); List<License> allLicenses = new ArrayList<License>(); // switch to search by name screen if (byName) { HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", "CD" }, { "_ctl7:rbtnSearch", "2" }, { "_ctl7:txtLicenseNumber", "" }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); // get the data grid entries if (page.select("table#_ctl7_grdSearchResults").size() < 1) { throw new ParsingException(ErrorCode.MITA50002.getDesc()); } Elements rows = page.select(GRID_ROW_SELECTOR); while (rows.size() > 0) { for (Element row : rows) { String url = row.select("a").first().attr("href"); String licenseNo = row.select("td:eq(5)").text(); HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url)); response = client.execute(getDetail); verifyAndAuditCall(getSearchURL(), response); Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity())); allLicenses.add(parseLicense(licenseDetails, licenseNo)); } rows.clear(); // check for next page Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first(); if (getLog() != null) { getLog().log(Level.DEBUG, "Current page is: " + currentPage.text()); } Element pageLink = currentPage.nextElementSibling(); if (pageLink != null && pageLink.hasAttr("href")) { if (getLog() != null) { getLog().log(Level.DEBUG, "There are more results, getting the next page."); } String target = parseEventTarget(pageLink.attr("href")); entity = getResultPage(criteria, client, page, search, target, getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); rows = page.select(GRID_ROW_SELECTOR); } } } else { // search by license number (site supports only exact match) HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) }, { "_ctl7:rbtnSearch", "1" }, { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); if (page.select("span#lblFormTitle").text().equals("License Details")) { String prefLicenseNo = criteria.getIdentifier(); allLicenses.add(parseLicense(page, prefLicenseNo)); } } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule2.java
@Override public ArrayList<Person> run(String url, Document doc) { ArrayList<Person> editorList = new ArrayList<>(); Elements editors = doc.select("dt:contains(Editor) ~ dd, dt:contains(Edition Editor) ~ dd"); if (editors.size() == 0) return null; boolean skip = false; for (Element editor : editors) { Element prev = editor.previousElementSibling(); if (prev.tagName().equals("dt")) { if ((!prev.text().trim().toLowerCase().startsWith("editor") && !prev.text().trim().toLowerCase().startsWith("edition editor")) || prev.text().trim().toLowerCase().contains("version") || prev.text().trim().toLowerCase().endsWith("draft:")) { skip = true;/*from w w w. ja v a2 s . com*/ } } if (skip) { Element next = editor.nextElementSibling(); if (next != null) { if (next.text().trim().toLowerCase().startsWith("editor") || next.text().trim().toLowerCase().contains("edition editor")) { skip = false; continue; } } continue; } if (StringUtils.countMatches(editor.text(), " - ") > 2) { Log.log("warning", "This editor may be a list of editors separated by - "); EditorsRule5 ed5 = new EditorsRule5(); return ed5.run(url, doc); } String[] splitted = editor.html().split("<br />|<br clear=\"none\" />"); if (splitted.length < 2) { if (editor.text().toLowerCase().startsWith("(in alphabetic") || editor.text().toLowerCase().startsWith("see acknowl") || editor.text().toLowerCase().startsWith("the w3") || editor.text().toLowerCase().startsWith("(see ac") || editor.text().toLowerCase().startsWith("see participants") || editor.text().toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:")) continue; Person result = NameParser.parse(editor.text()); if (result == null) continue; for (int i = 0; i < editor.select("a").size(); i++) { if (!editor.select("a").get(i).attr("href").isEmpty()) { if (editor.select("a").get(i).attr("href").contains("@")) { result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(editor.select("a").get(i).attr("href")); } } } editorList.add(result); } else { for (String split : splitted) { if (!split.isEmpty()) { if (split.toLowerCase().startsWith("(in alphabetic") || split.toLowerCase().startsWith("see acknowl") || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac") || split.toLowerCase().startsWith("see participants") || split.toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (split.equals("WHATWG:") || split.equals("W3C:")) continue; Document newdoc = Jsoup.parse(split.replaceAll("\n", "")); Person result = NameParser.parse(newdoc.text()); if (result == null) continue; for (int i = 0; i < newdoc.select("a").size(); i++) { if (!newdoc.select("a").get(i).attr("href").isEmpty()) { if (newdoc.select("a").get(i).attr("href").contains("@")) { result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(newdoc.select("a").get(i).attr("href")); } } } editorList.add(result); } } } Element next = editor.nextElementSibling(); if (next != null) if (next.tag().getName().equals("dt")) break; } if (editorList.size() == 0) return null; return editorList; }
From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule8.java
@Override public ArrayList<Person> run(String url, Document doc) { ArrayList<Person> editorList = new ArrayList<>(); Elements editors = doc.select("h4:contains(Editor) ~ blockquote"); if (editors.size() == 0) return null; boolean skip = false; for (Element editor : editors) { Element prev = editor.previousElementSibling(); if (prev.tagName().equals("h4")) { if ((!prev.text().trim().toLowerCase().startsWith("editor") && !prev.text().trim().toLowerCase().startsWith("edition editor")) || prev.text().trim().toLowerCase().endsWith("version:") || prev.text().trim().toLowerCase().endsWith("draft:")) { skip = true;//from w w w .j a v a2 s.c o m } } if (skip) { Element next = editor.nextElementSibling(); if (next != null) { if (next.text().trim().toLowerCase().startsWith("editor") || next.text().trim().toLowerCase().contains("edition editor")) { skip = false; continue; } } continue; } if (StringUtils.countMatches(editor.text(), " - ") > 2) { Log.log("warning", "This editor may be a list of editors separated by - "); EditorsRule5 ed5 = new EditorsRule5(); return ed5.run(url, doc); } String[] splitted = editor.html().split("<br />|<br clear=\"none\" />"); if (splitted.length < 2) { if (editor.text().toLowerCase().startsWith("(in alphabetic") || editor.text().toLowerCase().startsWith("see acknowl") || editor.text().toLowerCase().startsWith("the w3") || editor.text().toLowerCase().startsWith("(see ac") || editor.text().toLowerCase().startsWith("see participants") || editor.text().toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:")) continue; Person result = NameParser.parse(editor.text()); if (result == null) continue; for (int i = 0; i < editor.select("a").size(); i++) { if (!editor.select("a").get(i).attr("href").isEmpty()) { if (editor.select("a").get(i).attr("href").contains("@")) { result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(editor.select("a").get(i).attr("href")); } } } editorList.add(result); } else { for (String split : splitted) { if (!split.isEmpty()) { if (split.toLowerCase().startsWith("(in alphabetic") || split.toLowerCase().startsWith("see acknowl") || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac") || split.toLowerCase().startsWith("see participants") || split.toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (split.equals("WHATWG:") || split.equals("W3C:")) continue; Document newdoc = Jsoup.parse(split.replaceAll("\n", "")); Person result = NameParser.parse(newdoc.text()); if (result == null) continue; for (int i = 0; i < newdoc.select("a").size(); i++) { if (!newdoc.select("a").get(i).attr("href").isEmpty()) { if (newdoc.select("a").get(i).attr("href").contains("@")) { result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(newdoc.select("a").get(i).attr("href")); } } } editorList.add(result); } } } Element next = editor.nextElementSibling(); if (next != null) if (next.tag().getName().equals("h4")) break; } if (editorList.size() == 0) return null; return editorList; }
From source file:gov.medicaid.screening.dao.impl.NursingLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria./*from ww w. j a v a2 s . c o m*/ * @param byName flag indicating it is a name search * @return the search result for licenses * * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<License> getAllResults(NursingLicenseSearchCriteria criteria, boolean byName) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); client.setCookieStore(loginAsPublicUser()); HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); List<License> allLicenses = new ArrayList<License>(); // switch to search by name screen if (byName) { HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", "R" }, { "_ctl7:rbtnSearch", "2" }, { "_ctl7:txtCheckDigit", "" }, { "_ctl7:txtLicenseNumber", "" }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); // get the data grid entries if (page.select("table#_ctl7_grdSearchResults").size() < 1) { throw new ParsingException(ErrorCode.MITA50002.getDesc()); } Elements rows = page.select(GRID_ROW_SELECTOR); while (rows.size() > 0) { for (Element row : rows) { String url = row.select("a").first().attr("href"); String licenseNo = row.select("td:eq(4)").text(); HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url)); response = client.execute(getDetail); verifyAndAuditCall(getSearchURL(), response); Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity())); allLicenses.add(parseLicense(licenseDetails, licenseNo.substring(0, 1))); } rows.clear(); // check for next page Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first(); if (getLog() != null) { getLog().log(Level.DEBUG, "Current page is: " + currentPage.text()); } Element pageLink = currentPage.nextElementSibling(); if (pageLink != null && pageLink.hasAttr("href")) { if (getLog() != null) { getLog().log(Level.DEBUG, "There are more results, getting the next page."); } String target = parseEventTarget(pageLink.attr("href")); entity = getResultPage(criteria, client, page, search, target, getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); rows = page.select(GRID_ROW_SELECTOR); } } } else { // search by license number (site supports only exact match) HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) }, { "_ctl7:rbtnSearch", "1" }, { "_ctl7:txtCheckDigit", Util.defaultString(criteria.getCheckDigit()) }, { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); if (page.select("span#lblFormTitle").text().equals("License Details")) { String prefLicenseType = criteria.getLicenseType().getName(); allLicenses.add(parseLicense(page, prefLicenseType)); } } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", "")); Element select = navbarDoc.select("select[name=week]").first(); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); String info = navbarDoc.select(".description").text(); String stand;//from w w w .j a va 2 s .c om try { stand = info.substring(info.indexOf("Stand:")); } catch (Exception e) { stand = ""; } for (Element option : select.children()) { String week = option.attr("value"); String letter = data.optString("letter", "w"); if (data.optBoolean("single_classes", false)) { int classNumber = 1; for (String klasse : getAllClasses()) { String paddedNumber = String.format("%05d", classNumber); String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } writeTagByDatum(tage, tag); } classNumber++; } } else { String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } } v.setTage(tage); } return v; }
From source file:com.jimplush.goose.ContentExtractor.java
/** * alot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to * boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs * so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it * * * @param node//ww w.j a v a 2s.com * @return */ private boolean isOkToBoost(Element node) { int stepsAway = 0; Element sibling = node.nextElementSibling(); while (sibling != null) { if (sibling.tagName().equals("p")) { if (stepsAway >= 3) { if (logger.isDebugEnabled()) { logger.debug("Next paragraph is too far away, not boosting"); } return false; } String paraText = sibling.text(); WordStats wordStats = StopWords.getStopWordCount(paraText); if (wordStats.getStopWordCount() > 5) { if (logger.isDebugEnabled()) { logger.debug("We're gonna boost this node, seems contenty"); } return true; } } // increase how far away the next paragraph is from this node stepsAway++; sibling = sibling.nextElementSibling(); } return false; }
From source file:me.vertretungsplan.parser.DaVinciParser.java
@NotNull void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); Element titleElem; if (doc.select("h1.list-table-caption").size() > 0) { titleElem = doc.select("h1.list-table-caption").first(); } else {//from w w w.j a va 2s . c o m // DaVinci 5 titleElem = doc.select("h2").first(); } String title = titleElem.text(); String klasse = null; // title can either be date or class Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}"); Matcher dateMatcher = datePattern.matcher(title); if (dateMatcher.find()) { day.setDateString(dateMatcher.group()); day.setDate(ParserUtils.parseDate(dateMatcher.group())); } else { klasse = title; String nextText = titleElem.nextElementSibling().text(); if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) { day.setDateString(nextText); day.setDate(ParserUtils.parseDate(nextText)); } else { // could not find date, must be multiple days day = null; } } for (Element p : doc.select(".row:has(h1.list-table-caption) p")) { for (TextNode node : p.textNodes()) { if (!node.text().trim().isEmpty() && day != null) day.addMessage(node.text().trim()); } } for (Element message : doc.select(".callout")) { for (TextNode node : message.textNodes()) { if (!node.text().trim().isEmpty()) day.addMessage(node.text().trim()); } } Element lastChangeElem = doc.select(".row.copyright div").first(); if (lastChangeElem == null) { // DaVinci 5 lastChangeElem = doc.select("h1").first(); } String lastChange = lastChangeElem.ownText(); Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|"); Matcher matcher = pattern.matcher(lastChange); if (matcher.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm") .parseLocalDateTime(matcher.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } else { Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})"); Matcher matcher2 = pattern2.matcher(lastChange); if (matcher2.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm") .parseLocalDateTime(matcher2.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } } if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) { Element table = doc.select(".list-table, table").first(); parseDaVinciTable(table, schedule, klasse, day, colorProvider); } if (day != null) { schedule.addDay(day); } }
From source file:crawler.HackerEarthCrawler.java
@Override public void crawl() { int flag = 0; //set of urls which should be crawled TreeSet<String> linksset = new TreeSet<String>(); TreeSet<String> tempset = new TreeSet<String>(); TreeSet<String> tutorialset = new TreeSet<String>(); //final set of problem urls TreeSet<String> problemset = new TreeSet<String>(); //visited for maintaing status of if url is already crawled or not TreeMap<String, Integer> visited = new TreeMap<String, Integer>(); //add base url linksset.add(baseUrl);/*from w ww . jav a2 s. c om*/ //mark base url as not crawled visited.put(baseUrl, 0); try { while (true) { flag = 0; tempset.clear(); for (String str : linksset) { //check if url is already crawled or not and it has valid domain name if ((visited.get(str) == 0) && (str.startsWith("https://www.hackerearth.com/"))) { System.out.println("crawling " + str); //retriving response of current url as document Document doc = Jsoup.connect(str).timeout(0).userAgent( "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0") .referrer("http://www.google.com").ignoreHttpErrors(true).get(); //retriving all urls from current page Elements links = doc.select("a[href]"); //mark url as crawled visited.put(str, 1); //mark flag as url is crawled flag = 1; //retrive all urls for (Element link : links) { if (link.absUrl("href").endsWith("/tutorial/")) { tutorialset.add(link.absUrl("href")); } //check if url is problem url then add it in problemurlset if (link.absUrl("href").startsWith("https://www.hackerearth.com/") && isProblemUrl(link.absUrl("href"))) { problemset.add(link.absUrl("href")); } //check if url has valid domain and it has problem urls or not if (link.absUrl("href").contains(("https://www.hackerearth.com/")) && isCrawlable(link.absUrl("href"))) { //if link is not visited then mark it as uncrawled if (!visited.containsKey(link.absUrl("href"))) { visited.put(link.absUrl("href"), 0); } //add it in tempsetorary set tempset.add(link.absUrl("href")); //System.out.println("\n base: "+str+" ::: link : " + link.absUrl("href")); } } } } //if nothing is left to crawl break the loop if (flag == 0) { break; } //add all retrieved links to linksset linksset.addAll(tempset); } System.out.println("\n\ntotal problem urls " + problemset.size()); int i = 0; for (String str : problemset) { System.out.println("link " + i + " : " + str); i++; } } catch (IOException ex) { Logger.getLogger(HackerEarthCrawler.class.getName()).log(Level.SEVERE, null, ex); } //scrap and store into database //for every problem url scrap problem page for (String problemUrl : problemset) { System.out.println("problemUrl :" + problemUrl); try { //create problem class to store in database Problem problem = new Problem(); String problemSIOC = "", problemIOC = ""; String problemTitle = "", problemStatement = "", problemInput = "", problemOutput = "", problemConstraints = ""; String sampleInput = "", sampleOutput = ""; String problemExplanation = ""; //set default timelimit to 1 second double problemTimeLimit = 1.0; ArrayList<String> tags = new ArrayList<String>(); //get response for given problem url Response response = Jsoup.connect(problemUrl).execute(); Document doc = response.parse(); //retrieve problem title from page Element elementTitle = doc.getElementsByTag("title").first(); StringTokenizer stTitle = new StringTokenizer(elementTitle.text(), "|"); problemTitle = stTitle.nextToken().trim(); Element content = doc.getElementsByClass("starwars-lab").first(); problemSIOC = content.text(); Elements e = content.children(); //to find problem statement String breakloop[] = { "input", "input:", "input :", "input format:", "input format :", "input format", "Input and output", "constraints :", "constraints:", "constraints", "$$Input :$$" }; flag = 0; for (Element p : e) { String tempStatement = ""; for (Element pp : p.getAllElements()) { for (String strbreak : breakloop) { if (StringUtils.equalsIgnoreCase(pp.ownText(), strbreak)) { //System.out.println("strbreak :"+strbreak); tempStatement = p.text().substring(0, p.text().toLowerCase().indexOf(strbreak.toLowerCase())); // System.out.println("temp "+tempStatement); flag = 1; break; } } } if (flag == 1) { problemStatement += tempStatement; //remove extra space at end if (tempStatement.length() == 0) { problemStatement = problemStatement.substring(0, problemStatement.length() - 1); } break; } problemStatement += p.text() + " "; } System.out.println("problemSIOC :" + problemSIOC); System.out.println("problemStatement :" + problemStatement); if (problemStatement.length() <= problemSIOC.length()) { //remove problem statement from whole text and remove extra spaces at the beginning and the end problemIOC = problemSIOC.substring(problemStatement.length()).trim(); } else { problemIOC = ""; } System.out.println("problemIOC :" + problemIOC); //keywords for identifying input String decideInput[] = { "Input format :", "Input format:", "Input format", "inputformat:", "inputformat :", "inputformat", "input and output", "input :", "input:", "input" }; //keywords for identifying output String decideOutput[] = { "output format :", "output format:", "Output format", "outputformat:", "outputformat :", "outputformat", "output :", "output:", "output" }; //keywords for identifying constraint String decideConstraint[] = { "constraints:", "constraints :", "constraints", "Constraints :", "constraint:", "constraint :", "constraint", "Contraints :" }; int posin = 0, posoutput = 0, poscon = 0, idxin, idxout, idxcon, flaginput = 0, flagoutput = 0, flagcon = 0, inlen = 0, outlen = 0, conlen = 0; //find inputformat position,length of keyword for (idxin = 0; idxin < decideInput.length; idxin++) { if (StringUtils.containsIgnoreCase(problemIOC, decideInput[idxin])) { posin = problemIOC.toLowerCase().indexOf(decideInput[idxin].toLowerCase()); flaginput = 1; inlen = decideInput[idxin].length(); //decide it is keyowrd for actucal input or it is "sample input" if (StringUtils.containsIgnoreCase(problemIOC, "sample input")) { if (posin > problemIOC.toLowerCase().indexOf("sample input")) { flaginput = 0; inlen = 0; } else { break; } } else { break; } } } //find outputformat position,length of keyword for (idxout = 0; idxout < decideOutput.length; idxout++) { if (StringUtils.containsIgnoreCase(problemIOC, decideOutput[idxout])) { posoutput = problemIOC.toLowerCase().indexOf(decideOutput[idxout].toLowerCase()); flagoutput = 1; outlen = decideOutput[idxout].length(); break; } } //find constraint position,length of keyword for (idxcon = 0; idxcon < decideConstraint.length; idxcon++) { if (StringUtils.containsIgnoreCase(problemIOC, decideConstraint[idxcon])) { poscon = problemIOC.toLowerCase().indexOf(decideConstraint[idxcon].toLowerCase()); flagcon = 1; conlen = decideConstraint[idxcon].length(); break; } } System.out.println("input " + flaginput + " " + inlen + " " + posin); System.out.println("output " + flagoutput + " " + outlen + " " + posoutput); System.out.println("constraint " + flagcon + " " + conlen + " " + poscon); //retrieve problem input and output if present in problem page //if input format is present if (flaginput == 1) { //if input keyword is "input and output" and contraint is present in problem page if (idxin == 6 && flagcon == 1) { problemInput = problemIOC.substring(inlen, poscon); } //if input keyword is "input and output" and contraint is not present in problem page else if (idxin == 6 && flagcon == 0) { problemInput = problemIOC.substring(inlen); } //if output format and constraint is present else if (flagoutput == 1 && flagcon == 1) { //if constraint is present before input format if (poscon < posin) { problemInput = problemIOC.substring(posin + inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen); } //if constraint is present before sample else if (poscon < posoutput) { problemInput = problemIOC.substring(inlen, poscon); problemOutput = problemIOC.substring(posoutput + outlen); } else { problemInput = problemIOC.substring(inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen, poscon); } } //if constraint is not present else if (flagoutput == 1 && flagcon == 0) { problemInput = problemIOC.substring(inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen); } else if (flagoutput == 0 && flagcon == 1) { if (poscon < posin) { problemInput = problemIOC.substring(posin + inlen); } else { problemInput = problemIOC.substring(poscon + conlen, posin); } problemOutput = ""; } else { problemInput = problemIOC.substring(inlen); problemOutput = ""; } } //if input format and output format is not present else { problemInput = ""; problemOutput = ""; } //if constraint is present if (flagcon == 1) { //if constraint is present before input format if (poscon < posin) { problemConstraints = problemIOC.substring(0, posin); } //if constraint is present before output format else if (poscon < posoutput) { problemConstraints = problemIOC.substring(poscon + conlen, posoutput); } else { problemConstraints = problemIOC.substring(poscon + conlen); } } System.out.println("problemInput :" + problemInput); System.out.println("problemOutput :" + problemOutput); System.out.println("problemConstraints :" + problemConstraints); //retrieve problem tags from problem page Element elementtag = doc.getElementsByClass("problem-tags").first().child(1); StringTokenizer st = new StringTokenizer(elementtag.text(), ","); while (st.hasMoreTokens()) { tags.add(st.nextToken().trim()); } //retrieve sample input sample output if present Element elementSIO = doc.getElementsByClass("input-output-container").first(); //if sample input output is present if (elementSIO != null) { //find position of sample output int soutpos = elementSIO.text().indexOf("SAMPLE OUTPUT"); sampleInput = elementSIO.text().substring(12, soutpos); sampleOutput = elementSIO.text().substring(soutpos + 13); System.out.println("Sample input :\n" + sampleInput + "\n\n\n"); System.out.println("Sample Output :\n" + sampleOutput); } else { sampleInput = ""; sampleOutput = ""; } //retrieve problem explanation from problem page if present Element elementExplanation = doc.getElementsByClass("standard-margin").first().child(0); if (elementExplanation.text().toLowerCase().contains("explanation")) { problemExplanation = elementExplanation.nextElementSibling().text(); } System.out.println("Explanation :" + problemExplanation); //retrieve timelimit Element elementTL = doc.getElementsByClass("problem-guidelines").first().child(0).child(1); StringTokenizer stTL = new StringTokenizer(elementTL.ownText(), " "); problemTimeLimit = Double.parseDouble(stTL.nextToken()); //System.out.println("problemTimeLimit :"+problemTimeLimit); //set all retrieved information to problem class problem.setProblemUrl(problemUrl); if (problemTitle.length() == 0) { problemTitle = null; } if (problemStatement.length() == 0) { problemStatement = null; } if (problemInput.length() == 0) { problemInput = null; } if (problemOutput.length() == 0) { problemOutput = null; } if (problemExplanation.length() == 0) { problemExplanation = null; } if (problemConstraints.length() == 0) { problemConstraints = null; } problem.setTitle(problemTitle); problem.setProblemUrl(problemUrl); problem.setProblemStatement(problemStatement); problem.setInputFormat(problemInput); problem.setOutputFormat(problemOutput); problem.setTimeLimit(problemTimeLimit); problem.setExplanation(problemExplanation); problem.setConstraints(problemConstraints); //set sample input output to problem class SampleInputOutput sampleInputOutput = new SampleInputOutput(problem, sampleInput, sampleOutput); problem.getSampleInputOutputs().add(sampleInputOutput); //set platform as hackerearth problem.setPlatform(Platform.HackerEarth); for (String strtag : tags) { problem.getTags().add(strtag); } //store in database Session session = null; Transaction transaction = null; try { //start session session = HibernateUtil.getSessionFactory().openSession(); transaction = session.beginTransaction(); //check if problem is already stored in database String hql = "FROM Problem p where p.problemUrl = :problem_url"; Problem oldProblem = (Problem) session.createQuery(hql).setString("problem_url", problemUrl) .uniqueResult(); String task; //if problem is present in database if (oldProblem != null) { //update the old problem task = "updated"; //retrieve id of old problem problem.setId(oldProblem.getId()); session.delete(oldProblem); session.flush(); session.save(problem); } else { task = "saved"; session.save(problem); } transaction.commit(); //log the info to console Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}", new Object[] { task, problem.getProblemUrl() }); } catch (HibernateException ee) { if (transaction != null) { transaction.rollback(); } Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE, "Cannot Insert/Update problem into databse: " + problemUrl, e); } finally { //close the session if (session != null) { session.close(); } } } catch (Exception ee) { System.out.println(ee.toString()); } } System.out.println("\n\n\n\ntutorial urls\n\n"); try { for (String tutorialurl : tutorialset) { //System.out.println(tutorialurl+"\n\n"); Response tutorialres = Jsoup.connect(tutorialurl).execute(); Document doc = tutorialres.parse(); Tutorial tutorial = new Tutorial(); tutorial.setContent(doc.getElementsByClass("tutorial").first().text()); tutorial.setName(baseUrl); tutorialurl = tutorialurl.substring(0, tutorialurl.length() - 10); StringTokenizer tutorialtok = new StringTokenizer(tutorialurl, "/"); String tempstr = ""; while (tutorialtok.hasMoreTokens()) { tempstr = tutorialtok.nextToken(); } Session session = null; Transaction transaction = null; try { //start session session = HibernateUtil.getSessionFactory().openSession(); transaction = session.beginTransaction(); //check if problem is already stored in database String hql = "FROM Tutorial p where p.name = :name"; Tutorial oldProblem = (Tutorial) session.createQuery(hql).setString("name", tempstr) .uniqueResult(); String task; //if problem is present in database if (oldProblem != null) { //update the old problem task = "updated"; //retrieve id of old problem tutorial.setName(oldProblem.getName()); session.delete(oldProblem); session.flush(); session.save(tutorial); } else { task = "saved"; tutorial.setName(tempstr); session.save(tutorial); } transaction.commit(); //log the info to console Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}", new Object[] { task, tutorial.getName() }); } catch (HibernateException ee) { if (transaction != null) { transaction.rollback(); } Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE, "Cannot Insert/Update problem into databse: " + tempstr, ee); } finally { //close the session if (session != null) { session.close(); } } } } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:de.geeksfactory.opacclient.apis.Zones22.java
@Override public AccountData account(Account acc) throws IOException, NotReachableException, JSONException, SocketException, OpacErrorException { Document login = login(acc);//from w w w .j a v a 2 s.c o m if (login == null) return null; AccountData res = new AccountData(acc.getId()); String lent_link = null; String res_link = null; int lent_cnt = -1; int res_cnt = -1; for (Element td : login.select( ".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, .CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) { String section = td.text().trim(); if (section.contains("Entliehene Medien")) { lent_link = td.select("a").attr("href"); lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Vormerkungen")) { res_link = td.select("a").attr("href"); res_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Kontostand")) { res.setPendingFees(td.nextElementSibling().text().trim()); } else if (section.matches("Ausweis g.ltig bis")) { res.setValidUntil(td.nextElementSibling().text().trim()); } } assert (lent_cnt >= 0); assert (res_cnt >= 0); if (lent_link == null) return null; String lent_html = httpGet(opac_url + "/" + lent_link.replace("utf-8?Method", "utf-8&Method"), getDefaultEncoding()); Document lent_doc = Jsoup.parse(lent_html); List<Map<String, String>> lent = new ArrayList<Map<String, String>>(); SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy", Locale.GERMAN); Pattern id_pat = Pattern.compile("javascript:renewItem\\('[0-9]+','(.*)'\\)"); for (Element table : lent_doc .select(".LoansBrowseItemDetailsCellStripe table, .LoansBrowseItemDetailsCell table")) { Map<String, String> item = new HashMap<String, String>(); for (Element tr : table.select("tr")) { String desc = tr.select(".LoanBrowseFieldNameCell").text().trim(); String value = tr.select(".LoanBrowseFieldDataCell").text().trim(); if (desc.equals("Titel")) item.put(AccountData.KEY_LENT_TITLE, value); if (desc.equals("Verfasser")) item.put(AccountData.KEY_LENT_AUTHOR, value); if (desc.equals("Mediennummer")) item.put(AccountData.KEY_LENT_BARCODE, value); if (desc.equals("ausgeliehen in")) item.put(AccountData.KEY_LENT_BRANCH, value); if (desc.matches("F.+lligkeits.*datum")) { value = value.split(" ")[0]; item.put(AccountData.KEY_LENT_DEADLINE, value); try { item.put(AccountData.KEY_LENT_DEADLINE_TIMESTAMP, String.valueOf(sdf.parse(value).getTime())); } catch (ParseException e) { e.printStackTrace(); } } } if (table.select(".button[Title~=Zum]").size() == 1) { Matcher matcher1 = id_pat.matcher(table.select(".button[Title~=Zum]").attr("href")); if (matcher1.matches()) { item.put(AccountData.KEY_LENT_LINK, matcher1.group(1)); } } lent.add(item); } res.setLent(lent); assert (lent_cnt <= lent.size()); List<Map<String, String>> reservations = new ArrayList<Map<String, String>>(); String res_html = httpGet(opac_url + "/" + res_link, getDefaultEncoding()); Document res_doc = Jsoup.parse(res_html); for (Element table : res_doc .select(".MessageBrowseItemDetailsCell table, .MessageBrowseItemDetailsCellStripe table")) { Map<String, String> item = new HashMap<String, String>(); for (Element tr : table.select("tr")) { String desc = tr.select(".MessageBrowseFieldNameCell").text().trim(); String value = tr.select(".MessageBrowseFieldDataCell").text().trim(); if (desc.equals("Titel")) item.put(AccountData.KEY_RESERVATION_TITLE, value); if (desc.equals("Publikationsform")) item.put(AccountData.KEY_RESERVATION_FORMAT, value); if (desc.equals("Liefern an")) item.put(AccountData.KEY_RESERVATION_BRANCH, value); if (desc.equals("Status")) item.put(AccountData.KEY_RESERVATION_READY, value); } if ("Gelscht".equals(item.get(AccountData.KEY_RESERVATION_READY))) { continue; } reservations.add(item); } res.setReservations(reservations); assert (reservations.size() >= res_cnt); return res; }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
/** * Parses an Untis substitution schedule table * * @param table the <code>table</code> Element from the HTML document * @param data {@link SubstitutionScheduleData#getData()} * @param day the {@link SubstitutionScheduleDay} where the substitutions will be stored * @param defaultClass the class that should be set if there is no class column in the table *///from ww w.j a v a 2 s . c o m private void parseSubstitutionScheduleTable(Element table, JSONObject data, SubstitutionScheduleDay day, String defaultClass) throws JSONException, CredentialInvalidException { if (data.optBoolean(PARAM_CLASS_IN_EXTRA_LINE) || data.optBoolean("class_in_extra_line")) { // backwards compatibility for (Element element : table.select("td.inline_header")) { String className = getClassName(element.text(), data); if (isValidClass(className)) { Element zeile = null; try { zeile = element.parent().nextElementSibling(); if (zeile.select("td") == null) { zeile = zeile.nextElementSibling(); } int skipLines = 0; while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) { if (skipLines > 0) { skipLines--; zeile = zeile.nextElementSibling(); continue; } Substitution v = new Substitution(); int i = 0; for (Element spalte : zeile.select("td")) { String text = spalte.text(); if (isEmpty(text)) { i++; continue; } int skipLinesForThisColumn = 0; Element nextLine = zeile.nextElementSibling(); boolean continueSkippingLines = true; while (continueSkippingLines) { if (nextLine != null && nextLine.children().size() == zeile.children().size()) { Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex()); if (columnInNextLine.text().replaceAll("\u00A0", "").trim() .equals(nextLine.text().replaceAll("\u00A0", "").trim())) { // Continued in the next line text += " " + columnInNextLine.text(); skipLinesForThisColumn++; nextLine = nextLine.nextElementSibling(); } else { continueSkippingLines = false; } } else { continueSkippingLines = false; } } if (skipLinesForThisColumn > skipLines) skipLines = skipLinesForThisColumn; String type = data.getJSONArray(PARAM_COLUMNS).getString(i); switch (type) { case "lesson": v.setLesson(text); break; case "subject": handleSubject(v, spalte); break; case "previousSubject": v.setPreviousSubject(text); break; case "type": v.setType(text); v.setColor(colorProvider.getColor(text)); break; case "type-entfall": if (text.equals("x")) { v.setType("Entfall"); v.setColor(colorProvider.getColor("Entfall")); } else { v.setType("Vertretung"); v.setColor(colorProvider.getColor("Vertretung")); } break; case "room": handleRoom(v, spalte); break; case "teacher": handleTeacher(v, spalte, data); break; case "previousTeacher": v.setPreviousTeachers(splitTeachers(text, data)); break; case "desc": v.setDesc(text); break; case "desc-type": v.setDesc(text); String recognizedType = recognizeType(text); v.setType(recognizedType); v.setColor(colorProvider.getColor(recognizedType)); break; case "previousRoom": v.setPreviousRoom(text); break; case "substitutionFrom": v.setSubstitutionFrom(text); break; case "teacherTo": v.setTeacherTo(text); break; case "ignore": break; case "date": // used by UntisSubstitutionParser break; default: throw new IllegalArgumentException("Unknown column type: " + type); } i++; } autoDetectType(data, zeile, v); v.getClasses().add(className); if (v.getLesson() != null && !v.getLesson().equals("")) { day.addSubstitution(v); } zeile = zeile.nextElementSibling(); } } catch (Throwable e) { e.printStackTrace(); } } } } else { boolean hasType = false; for (int i = 0; i < data.getJSONArray(PARAM_COLUMNS).length(); i++) { if (data.getJSONArray(PARAM_COLUMNS).getString(i).equals("type")) { hasType = true; } } int skipLines = 0; for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), " + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]):gt(0)")) { if (skipLines > 0) { skipLines--; continue; } Substitution v = new Substitution(); String klassen = defaultClass != null ? defaultClass : ""; int i = 0; for (Element spalte : zeile.select("td")) { String text = spalte.text(); String type = data.getJSONArray(PARAM_COLUMNS).getString(i); if (isEmpty(text) && !type.equals("type-entfall")) { i++; continue; } int skipLinesForThisColumn = 0; Element nextLine = zeile.nextElementSibling(); boolean continueSkippingLines = true; while (continueSkippingLines) { if (nextLine != null && nextLine.children().size() == zeile.children().size()) { Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex()); if (columnInNextLine.text().replaceAll("\u00A0", "").trim() .equals(nextLine.text().replaceAll("\u00A0", "").trim())) { // Continued in the next line text += " " + columnInNextLine.text(); skipLinesForThisColumn++; nextLine = nextLine.nextElementSibling(); } else { continueSkippingLines = false; } } else { continueSkippingLines = false; } } if (skipLinesForThisColumn > skipLines) skipLines = skipLinesForThisColumn; switch (type) { case "lesson": v.setLesson(text); break; case "subject": handleSubject(v, spalte); break; case "previousSubject": v.setPreviousSubject(text); break; case "type": v.setType(text); v.setColor(colorProvider.getColor(text)); break; case "type-entfall": if (text.equals("x")) { v.setType("Entfall"); v.setColor(colorProvider.getColor("Entfall")); } else if (!hasType) { v.setType("Vertretung"); v.setColor(colorProvider.getColor("Vertretung")); } break; case "room": handleRoom(v, spalte); break; case "previousRoom": v.setPreviousRoom(text); break; case "desc": v.setDesc(text); break; case "desc-type": v.setDesc(text); String recognizedType = recognizeType(text); v.setType(recognizedType); v.setColor(colorProvider.getColor(recognizedType)); break; case "teacher": handleTeacher(v, spalte, data); break; case "previousTeacher": v.setPreviousTeachers(splitTeachers(text, data)); break; case "substitutionFrom": v.setSubstitutionFrom(text); break; case "teacherTo": v.setTeacherTo(text); break; case "class": klassen = getClassName(text, data); break; case "ignore": break; case "date": // used by UntisSubstitutionParser break; default: throw new IllegalArgumentException("Unknown column type: " + type); } i++; } if (v.getLesson() == null || v.getLesson().equals("")) { continue; } autoDetectType(data, zeile, v); List<String> affectedClasses; // Detect things like "7" Pattern singlePattern = Pattern.compile("(\\d+)"); Matcher singleMatcher = singlePattern.matcher(klassen); // Detect things like "5-12" Pattern rangePattern = Pattern.compile("(\\d+) ?- ?(\\d+)"); Matcher rangeMatcher = rangePattern.matcher(klassen); Pattern pattern2 = Pattern.compile("^(\\d+).*"); if (rangeMatcher.matches()) { affectedClasses = new ArrayList<>(); int min = Integer.parseInt(rangeMatcher.group(1)); int max = Integer.parseInt(rangeMatcher.group(2)); try { for (String klasse : getAllClasses()) { Matcher matcher2 = pattern2.matcher(klasse); if (matcher2.matches()) { int num = Integer.parseInt(matcher2.group(1)); if (min <= num && num <= max) affectedClasses.add(klasse); } } } catch (IOException e) { e.printStackTrace(); } } else if (singleMatcher.matches()) { affectedClasses = new ArrayList<>(); int grade = Integer.parseInt(singleMatcher.group(1)); try { for (String klasse : getAllClasses()) { Matcher matcher2 = pattern2.matcher(klasse); if (matcher2.matches() && grade == Integer.parseInt(matcher2.group(1))) { affectedClasses.add(klasse); } } } catch (IOException e) { e.printStackTrace(); } } else { if (data.optBoolean(PARAM_CLASSES_SEPARATED, true) && data.optBoolean("classes_separated", true)) { // backwards compatibility affectedClasses = Arrays.asList(klassen.split(", ")); } else { affectedClasses = new ArrayList<>(); try { for (String klasse : getAllClasses()) { // TODO: is there a better way? StringBuilder regex = new StringBuilder(); for (char character : klasse.toCharArray()) { if (character == '?') { regex.append("\\?"); } else { regex.append(character); } regex.append(".*"); } if (klassen.matches(regex.toString())) { affectedClasses.add(klasse); } } } catch (IOException e) { e.printStackTrace(); } } } for (String klasse : affectedClasses) { if (isValidClass(klasse)) { v.getClasses().add(klasse); } } if (data.optBoolean(PARAM_MERGE_WITH_DIFFERENT_TYPE, false)) { boolean found = false; for (Substitution subst : day.getSubstitutions()) { if (subst.equalsExcludingType(v)) { found = true; if (v.getType().equals("Vertretung")) { subst.setType("Vertretung"); subst.setColor(colorProvider.getColor("Vertretung")); } break; } } if (!found) { day.addSubstitution(v); } } else { day.addSubstitution(v); } } } }