List of usage examples for org.jsoup.nodes Element ownText
public String ownText()
From source file:com.glluch.profilesparser.ProfileHtmlReader.java
private ArrayList<String> ul2array(Element list) { Elements llist = list.select("li"); ArrayList<String> l = new ArrayList<>(); for (Element li : llist) { l.add(li.ownText()); }// w ww.jav a 2 s . c om return l; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
private void parseIssueAlertIusse(Document doc) { // TODO Auto-generated method stub Message message = null;// w ww . j ava 2 s. c o m RssFeedGenerator newRssFeedGenerator = new RssFeedGenerator(); try { String content = "EventName: AlertIusse\n"; message = new Message(); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } System.out.println("content:" + content); newRssFeedGenerator.RssFeedXml("title", "Link", content); System.out.println("!!!"); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
private Message parseIdentityRecommendation(Document doc) { Message message = null;/* w w w .j ava 2 s .c om*/ try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IdentityRecommendation\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Identity Recommendation"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
public Message parseIdentityVerification(Document doc) { Message message = null;/*from ww w . j ava 2 s.c o m*/ try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IdentityVerification\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Identity Verification"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
public Message parseIssueRecommendation(Document doc) { Message message = null;// www . java 2 s.c o m try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IssueRecommendation\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Issue Recommendation"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:com.glluch.profilesparser.ProfileHtmlReader.java
/** * Given a ICT profile in html file, extracts * all the competences in the profile and builds a list of * {@link com.glluch.profilesparser.ECFMap} from them. * @param filename The html file where the profile is stored. * @return A list with all the competences as {@link com.glluch.profilesparser.ECFMap}. * @throws IOException can't not read the html file. *//* w w w . j av a 2 s . c o m*/ public ArrayList<ECFMap> competences(String filename) throws IOException { init(filename); ArrayList<ECFMap> ecfm; int i = 0; Elements cs = doc.select("h4");//the first h4 is not a comptence ArrayList<String> comps = new ArrayList<>();//all comptences are here but not the levels for (Element c : cs) { if (i != 0) { comps.add(c.ownText()); } i++; } //for ecfm = foundLevels(comps, allTxt); return ecfm; }
From source file:com.liato.bankdroid.banking.banks.Hemkop.java
@Override public void update() throws BankException, LoginException, BankChoiceException { super.update(); if (username == null || password == null || username.length() == 0 || password.length() == 0) { throw new LoginException(res.getText(R.string.invalid_username_password).toString()); }//from w ww . jav a2 s . c om urlopen = login(); Document d = Jsoup.parse(response); Elements amounts = d.select(".bonusStatement .amount"); Elements names = d.select(".bonusStatement .label"); for (int i = 0; i < Math.min(amounts.size(), names.size()); i++) { Element amount = amounts.get(i); Element name = names.get(i); BigDecimal accountBalance = Helpers.parseBalance(amount.ownText()); Account account = new Account(name.ownText().replace(":", "").trim(), accountBalance, String.format("acc_%d", i)); if (i > 0) { account.setAliasfor("acc_0"); } accounts.add(account); balance = balance.add(accountBalance); } if (accounts.isEmpty()) { throw new BankException(res.getText(R.string.no_accounts_found).toString()); } Account account = accounts.get(0); try { response = urlopen.open("https://www.hemkop.se/Mina-sidor/Kontoutdrag/"); d = Jsoup.parse(response); Elements es = d.select(".transactions tbody tr"); ArrayList<Transaction> transactions = new ArrayList<Transaction>(); for (Element e : es) { Transaction t = new Transaction(e.child(1).ownText().trim(), e.child(0).ownText().trim(), Helpers.parseBalance(e.child(3).ownText())); if (!TextUtils.isEmpty(e.child(2).ownText())) { t.setCurrency(Helpers.parseCurrency(e.child(2).ownText().trim(), "SEK")); } transactions.add(t); } account.setTransactions(transactions); es = d.select(".currentBalance,.disposable"); int i = 0; for (Element e : es) { Account a = new Account(e.child(0).ownText().trim(), Helpers.parseBalance(e.child(1).ownText()), String.format("acc_cc_%d", i)); a.setAliasfor("acc_0"); accounts.add(a); i++; } } catch (ClientProtocolException e) { e.printStackTrace(); Log.e(TAG, e.getMessage() != null ? e.getMessage() : ""); } catch (IOException e) { e.printStackTrace(); Log.e(TAG, e.getMessage() != null ? e.getMessage() : ""); } super.updateComplete(); }
From source file:com.glluch.profilesparser.ProfileHtmlReader.java
/** * Read an parse a hmtl file which contains a ICT profile. Some parts are extracted * with Jsop and some others from a plain text. * @param filename The html file where the profile is stored. * @return An ICTProfile read from the html file. *///from w ww . jav a 2 s. c om @Override public ICTProfile reader(String filename) { ICTProfile res = new ICTProfile(); try { init(filename); Element ts = doc.select("h2").first(); res.setTitle(ts.ownText().trim()); //Get summary, the text Mission and KPI int i = 0; Elements txts = doc.select("h3 + p"); for (Element text : txts) { if (i == 0) { res.setSummary(text.ownText()); } if (i == 1) { res.setMission(new Mission(text.ownText())); } if (i == 2) { res.setKpi(text.ownText()); } i++; } //Get Mission Deliverables and tasks String acc = StringUtils.substringBetween(allTxt, "Accountable", "Responsible").trim(); String respon = StringUtils.substringBetween(allTxt, "Responsible", "Contributor").trim(); String contrib = StringUtils.substringBetween(allTxt, "Contributor", "Main task/s").trim(); String tks = StringUtils.substringBetween(allTxt, "Main task/s", "KPI area ").trim(); HashMap<Integer, String> uls = new HashMap<>(); i = 0; if (StringUtils.isNotEmpty(acc)) { uls.put(i++, "Accountable"); } if (StringUtils.isNotEmpty(respon)) { uls.put(i++, "Responsible"); } if (StringUtils.isNotEmpty(contrib)) { uls.put(i++, "Contributor"); } if (StringUtils.isNotEmpty(tks)) { uls.put(i++, "Main task/s"); } //TODO delete else //System.out.println(uls.toString()); Elements html_uls = doc.select("ul"); if (html_uls.size() != uls.size()) { System.out.println("\nERROR in " + res.getTitle() + ", num ul=" + html_uls.size() + ", num_parts=" + uls.size()); } i = 0; for (Element ul : html_uls) { String target = uls.get(i); res = place(res, target, ul); i++; } //res.setTasks(tasks); //Get Competences i = 0; Elements cs = doc.select("h4");//the first h4 is not a competence ArrayList<String> comps = new ArrayList<>();//all comptences are here but not the levels for (Element c : cs) { if (i != 0) { comps.add(c.ownText()); } i++; } //for res.setEcfs(foundLevels(comps, allTxt)); //first p after first h3 h3:eq(0) + p } catch (IOException ex) { Logger.getLogger(ProfileHtmlReader.class.getName()).log(Level.SEVERE, null, ex); } return res; }
From source file:me.vertretungsplan.parser.DaVinciParser.java
@NotNull void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); Element titleElem;/* w w w. java 2 s. com*/ if (doc.select("h1.list-table-caption").size() > 0) { titleElem = doc.select("h1.list-table-caption").first(); } else { // DaVinci 5 titleElem = doc.select("h2").first(); } String title = titleElem.text(); String klasse = null; // title can either be date or class Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}"); Matcher dateMatcher = datePattern.matcher(title); if (dateMatcher.find()) { day.setDateString(dateMatcher.group()); day.setDate(ParserUtils.parseDate(dateMatcher.group())); } else { klasse = title; String nextText = titleElem.nextElementSibling().text(); if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) { day.setDateString(nextText); day.setDate(ParserUtils.parseDate(nextText)); } else { // could not find date, must be multiple days day = null; } } for (Element p : doc.select(".row:has(h1.list-table-caption) p")) { for (TextNode node : p.textNodes()) { if (!node.text().trim().isEmpty() && day != null) day.addMessage(node.text().trim()); } } for (Element message : doc.select(".callout")) { for (TextNode node : message.textNodes()) { if (!node.text().trim().isEmpty()) day.addMessage(node.text().trim()); } } Element lastChangeElem = doc.select(".row.copyright div").first(); if (lastChangeElem == null) { // DaVinci 5 lastChangeElem = doc.select("h1").first(); } String lastChange = lastChangeElem.ownText(); Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|"); Matcher matcher = pattern.matcher(lastChange); if (matcher.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm") .parseLocalDateTime(matcher.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } else { Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})"); Matcher matcher2 = pattern2.matcher(lastChange); if (matcher2.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm") .parseLocalDateTime(matcher2.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } } if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) { Element table = doc.select(".list-table, table").first(); parseDaVinciTable(table, schedule, klasse, day, colorProvider); } if (day != null) { schedule.addDay(day); } }
From source file:crawler.HackerEarthCrawler.java
@Override public void crawl() { int flag = 0; //set of urls which should be crawled TreeSet<String> linksset = new TreeSet<String>(); TreeSet<String> tempset = new TreeSet<String>(); TreeSet<String> tutorialset = new TreeSet<String>(); //final set of problem urls TreeSet<String> problemset = new TreeSet<String>(); //visited for maintaing status of if url is already crawled or not TreeMap<String, Integer> visited = new TreeMap<String, Integer>(); //add base url linksset.add(baseUrl);/*w w w . ja v a2s . c om*/ //mark base url as not crawled visited.put(baseUrl, 0); try { while (true) { flag = 0; tempset.clear(); for (String str : linksset) { //check if url is already crawled or not and it has valid domain name if ((visited.get(str) == 0) && (str.startsWith("https://www.hackerearth.com/"))) { System.out.println("crawling " + str); //retriving response of current url as document Document doc = Jsoup.connect(str).timeout(0).userAgent( "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0") .referrer("http://www.google.com").ignoreHttpErrors(true).get(); //retriving all urls from current page Elements links = doc.select("a[href]"); //mark url as crawled visited.put(str, 1); //mark flag as url is crawled flag = 1; //retrive all urls for (Element link : links) { if (link.absUrl("href").endsWith("/tutorial/")) { tutorialset.add(link.absUrl("href")); } //check if url is problem url then add it in problemurlset if (link.absUrl("href").startsWith("https://www.hackerearth.com/") && isProblemUrl(link.absUrl("href"))) { problemset.add(link.absUrl("href")); } //check if url has valid domain and it has problem urls or not if (link.absUrl("href").contains(("https://www.hackerearth.com/")) && isCrawlable(link.absUrl("href"))) { //if link is not visited then mark it as uncrawled if (!visited.containsKey(link.absUrl("href"))) { visited.put(link.absUrl("href"), 0); } //add it in tempsetorary set tempset.add(link.absUrl("href")); //System.out.println("\n base: "+str+" ::: link : " + link.absUrl("href")); } } } } //if nothing is left to crawl break the loop if (flag == 0) { break; } //add all retrieved links to linksset linksset.addAll(tempset); } System.out.println("\n\ntotal problem urls " + problemset.size()); int i = 0; for (String str : problemset) { System.out.println("link " + i + " : " + str); i++; } } catch (IOException ex) { Logger.getLogger(HackerEarthCrawler.class.getName()).log(Level.SEVERE, null, ex); } //scrap and store into database //for every problem url scrap problem page for (String problemUrl : problemset) { System.out.println("problemUrl :" + problemUrl); try { //create problem class to store in database Problem problem = new Problem(); String problemSIOC = "", problemIOC = ""; String problemTitle = "", problemStatement = "", problemInput = "", problemOutput = "", problemConstraints = ""; String sampleInput = "", sampleOutput = ""; String problemExplanation = ""; //set default timelimit to 1 second double problemTimeLimit = 1.0; ArrayList<String> tags = new ArrayList<String>(); //get response for given problem url Response response = Jsoup.connect(problemUrl).execute(); Document doc = response.parse(); //retrieve problem title from page Element elementTitle = doc.getElementsByTag("title").first(); StringTokenizer stTitle = new StringTokenizer(elementTitle.text(), "|"); problemTitle = stTitle.nextToken().trim(); Element content = doc.getElementsByClass("starwars-lab").first(); problemSIOC = content.text(); Elements e = content.children(); //to find problem statement String breakloop[] = { "input", "input:", "input :", "input format:", "input format :", "input format", "Input and output", "constraints :", "constraints:", "constraints", "$$Input :$$" }; flag = 0; for (Element p : e) { String tempStatement = ""; for (Element pp : p.getAllElements()) { for (String strbreak : breakloop) { if (StringUtils.equalsIgnoreCase(pp.ownText(), strbreak)) { //System.out.println("strbreak :"+strbreak); tempStatement = p.text().substring(0, p.text().toLowerCase().indexOf(strbreak.toLowerCase())); // System.out.println("temp "+tempStatement); flag = 1; break; } } } if (flag == 1) { problemStatement += tempStatement; //remove extra space at end if (tempStatement.length() == 0) { problemStatement = problemStatement.substring(0, problemStatement.length() - 1); } break; } problemStatement += p.text() + " "; } System.out.println("problemSIOC :" + problemSIOC); System.out.println("problemStatement :" + problemStatement); if (problemStatement.length() <= problemSIOC.length()) { //remove problem statement from whole text and remove extra spaces at the beginning and the end problemIOC = problemSIOC.substring(problemStatement.length()).trim(); } else { problemIOC = ""; } System.out.println("problemIOC :" + problemIOC); //keywords for identifying input String decideInput[] = { "Input format :", "Input format:", "Input format", "inputformat:", "inputformat :", "inputformat", "input and output", "input :", "input:", "input" }; //keywords for identifying output String decideOutput[] = { "output format :", "output format:", "Output format", "outputformat:", "outputformat :", "outputformat", "output :", "output:", "output" }; //keywords for identifying constraint String decideConstraint[] = { "constraints:", "constraints :", "constraints", "Constraints :", "constraint:", "constraint :", "constraint", "Contraints :" }; int posin = 0, posoutput = 0, poscon = 0, idxin, idxout, idxcon, flaginput = 0, flagoutput = 0, flagcon = 0, inlen = 0, outlen = 0, conlen = 0; //find inputformat position,length of keyword for (idxin = 0; idxin < decideInput.length; idxin++) { if (StringUtils.containsIgnoreCase(problemIOC, decideInput[idxin])) { posin = problemIOC.toLowerCase().indexOf(decideInput[idxin].toLowerCase()); flaginput = 1; inlen = decideInput[idxin].length(); //decide it is keyowrd for actucal input or it is "sample input" if (StringUtils.containsIgnoreCase(problemIOC, "sample input")) { if (posin > problemIOC.toLowerCase().indexOf("sample input")) { flaginput = 0; inlen = 0; } else { break; } } else { break; } } } //find outputformat position,length of keyword for (idxout = 0; idxout < decideOutput.length; idxout++) { if (StringUtils.containsIgnoreCase(problemIOC, decideOutput[idxout])) { posoutput = problemIOC.toLowerCase().indexOf(decideOutput[idxout].toLowerCase()); flagoutput = 1; outlen = decideOutput[idxout].length(); break; } } //find constraint position,length of keyword for (idxcon = 0; idxcon < decideConstraint.length; idxcon++) { if (StringUtils.containsIgnoreCase(problemIOC, decideConstraint[idxcon])) { poscon = problemIOC.toLowerCase().indexOf(decideConstraint[idxcon].toLowerCase()); flagcon = 1; conlen = decideConstraint[idxcon].length(); break; } } System.out.println("input " + flaginput + " " + inlen + " " + posin); System.out.println("output " + flagoutput + " " + outlen + " " + posoutput); System.out.println("constraint " + flagcon + " " + conlen + " " + poscon); //retrieve problem input and output if present in problem page //if input format is present if (flaginput == 1) { //if input keyword is "input and output" and contraint is present in problem page if (idxin == 6 && flagcon == 1) { problemInput = problemIOC.substring(inlen, poscon); } //if input keyword is "input and output" and contraint is not present in problem page else if (idxin == 6 && flagcon == 0) { problemInput = problemIOC.substring(inlen); } //if output format and constraint is present else if (flagoutput == 1 && flagcon == 1) { //if constraint is present before input format if (poscon < posin) { problemInput = problemIOC.substring(posin + inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen); } //if constraint is present before sample else if (poscon < posoutput) { problemInput = problemIOC.substring(inlen, poscon); problemOutput = problemIOC.substring(posoutput + outlen); } else { problemInput = problemIOC.substring(inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen, poscon); } } //if constraint is not present else if (flagoutput == 1 && flagcon == 0) { problemInput = problemIOC.substring(inlen, posoutput); problemOutput = problemIOC.substring(posoutput + outlen); } else if (flagoutput == 0 && flagcon == 1) { if (poscon < posin) { problemInput = problemIOC.substring(posin + inlen); } else { problemInput = problemIOC.substring(poscon + conlen, posin); } problemOutput = ""; } else { problemInput = problemIOC.substring(inlen); problemOutput = ""; } } //if input format and output format is not present else { problemInput = ""; problemOutput = ""; } //if constraint is present if (flagcon == 1) { //if constraint is present before input format if (poscon < posin) { problemConstraints = problemIOC.substring(0, posin); } //if constraint is present before output format else if (poscon < posoutput) { problemConstraints = problemIOC.substring(poscon + conlen, posoutput); } else { problemConstraints = problemIOC.substring(poscon + conlen); } } System.out.println("problemInput :" + problemInput); System.out.println("problemOutput :" + problemOutput); System.out.println("problemConstraints :" + problemConstraints); //retrieve problem tags from problem page Element elementtag = doc.getElementsByClass("problem-tags").first().child(1); StringTokenizer st = new StringTokenizer(elementtag.text(), ","); while (st.hasMoreTokens()) { tags.add(st.nextToken().trim()); } //retrieve sample input sample output if present Element elementSIO = doc.getElementsByClass("input-output-container").first(); //if sample input output is present if (elementSIO != null) { //find position of sample output int soutpos = elementSIO.text().indexOf("SAMPLE OUTPUT"); sampleInput = elementSIO.text().substring(12, soutpos); sampleOutput = elementSIO.text().substring(soutpos + 13); System.out.println("Sample input :\n" + sampleInput + "\n\n\n"); System.out.println("Sample Output :\n" + sampleOutput); } else { sampleInput = ""; sampleOutput = ""; } //retrieve problem explanation from problem page if present Element elementExplanation = doc.getElementsByClass("standard-margin").first().child(0); if (elementExplanation.text().toLowerCase().contains("explanation")) { problemExplanation = elementExplanation.nextElementSibling().text(); } System.out.println("Explanation :" + problemExplanation); //retrieve timelimit Element elementTL = doc.getElementsByClass("problem-guidelines").first().child(0).child(1); StringTokenizer stTL = new StringTokenizer(elementTL.ownText(), " "); problemTimeLimit = Double.parseDouble(stTL.nextToken()); //System.out.println("problemTimeLimit :"+problemTimeLimit); //set all retrieved information to problem class problem.setProblemUrl(problemUrl); if (problemTitle.length() == 0) { problemTitle = null; } if (problemStatement.length() == 0) { problemStatement = null; } if (problemInput.length() == 0) { problemInput = null; } if (problemOutput.length() == 0) { problemOutput = null; } if (problemExplanation.length() == 0) { problemExplanation = null; } if (problemConstraints.length() == 0) { problemConstraints = null; } problem.setTitle(problemTitle); problem.setProblemUrl(problemUrl); problem.setProblemStatement(problemStatement); problem.setInputFormat(problemInput); problem.setOutputFormat(problemOutput); problem.setTimeLimit(problemTimeLimit); problem.setExplanation(problemExplanation); problem.setConstraints(problemConstraints); //set sample input output to problem class SampleInputOutput sampleInputOutput = new SampleInputOutput(problem, sampleInput, sampleOutput); problem.getSampleInputOutputs().add(sampleInputOutput); //set platform as hackerearth problem.setPlatform(Platform.HackerEarth); for (String strtag : tags) { problem.getTags().add(strtag); } //store in database Session session = null; Transaction transaction = null; try { //start session session = HibernateUtil.getSessionFactory().openSession(); transaction = session.beginTransaction(); //check if problem is already stored in database String hql = "FROM Problem p where p.problemUrl = :problem_url"; Problem oldProblem = (Problem) session.createQuery(hql).setString("problem_url", problemUrl) .uniqueResult(); String task; //if problem is present in database if (oldProblem != null) { //update the old problem task = "updated"; //retrieve id of old problem problem.setId(oldProblem.getId()); session.delete(oldProblem); session.flush(); session.save(problem); } else { task = "saved"; session.save(problem); } transaction.commit(); //log the info to console Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}", new Object[] { task, problem.getProblemUrl() }); } catch (HibernateException ee) { if (transaction != null) { transaction.rollback(); } Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE, "Cannot Insert/Update problem into databse: " + problemUrl, e); } finally { //close the session if (session != null) { session.close(); } } } catch (Exception ee) { System.out.println(ee.toString()); } } System.out.println("\n\n\n\ntutorial urls\n\n"); try { for (String tutorialurl : tutorialset) { //System.out.println(tutorialurl+"\n\n"); Response tutorialres = Jsoup.connect(tutorialurl).execute(); Document doc = tutorialres.parse(); Tutorial tutorial = new Tutorial(); tutorial.setContent(doc.getElementsByClass("tutorial").first().text()); tutorial.setName(baseUrl); tutorialurl = tutorialurl.substring(0, tutorialurl.length() - 10); StringTokenizer tutorialtok = new StringTokenizer(tutorialurl, "/"); String tempstr = ""; while (tutorialtok.hasMoreTokens()) { tempstr = tutorialtok.nextToken(); } Session session = null; Transaction transaction = null; try { //start session session = HibernateUtil.getSessionFactory().openSession(); transaction = session.beginTransaction(); //check if problem is already stored in database String hql = "FROM Tutorial p where p.name = :name"; Tutorial oldProblem = (Tutorial) session.createQuery(hql).setString("name", tempstr) .uniqueResult(); String task; //if problem is present in database if (oldProblem != null) { //update the old problem task = "updated"; //retrieve id of old problem tutorial.setName(oldProblem.getName()); session.delete(oldProblem); session.flush(); session.save(tutorial); } else { task = "saved"; tutorial.setName(tempstr); session.save(tutorial); } transaction.commit(); //log the info to console Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}", new Object[] { task, tutorial.getName() }); } catch (HibernateException ee) { if (transaction != null) { transaction.rollback(); } Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE, "Cannot Insert/Update problem into databse: " + tempstr, ee); } finally { //close the session if (session != null) { session.close(); } } } } catch (Exception e) { System.out.println(e.getMessage()); } }