Example usage for org.jsoup.nodes Element ownText

List of usage examples for org.jsoup.nodes Element ownText

Introduction

In this page you can find the example usage for org.jsoup.nodes Element ownText.

Prototype

public String ownText() 

Source Link

Document

Gets the text owned by this element only; does not get the combined text of all children.

Usage

From source file:com.glluch.profilesparser.ProfileHtmlReader.java

private ArrayList<String> ul2array(Element list) {
    Elements llist = list.select("li");
    ArrayList<String> l = new ArrayList<>();
    for (Element li : llist) {
        l.add(li.ownText());
    }//  w  ww.jav  a  2  s .  c om
    return l;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

private void parseIssueAlertIusse(Document doc) {
    // TODO Auto-generated method stub
    Message message = null;// w ww  . j ava 2 s.  c  o m
    RssFeedGenerator newRssFeedGenerator = new RssFeedGenerator();
    try {

        String content = "EventName: AlertIusse\n";
        message = new Message();
        Elements events = doc.select("ns1|eventData");
        org.jsoup.nodes.Element event = events.get(0);
        Elements elements = event.getAllElements();
        for (org.jsoup.nodes.Element element : elements) {
            content = content + element.tagName() + " : " + element.ownText() + "\n";
        }
        System.out.println("content:" + content);
        newRssFeedGenerator.RssFeedXml("title", "Link", content);
        System.out.println("!!!");
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }

}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

private Message parseIdentityRecommendation(Document doc) {
    Message message = null;/* w  w w  .j ava  2 s .c om*/
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIdentityVerification(Document doc) {
    Message message = null;/*from ww  w . j ava 2  s.c  o m*/
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityVerification\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Verification");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIssueRecommendation(Document doc) {
    Message message = null;// www . java  2 s.c o m
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IssueRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);

            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Issue Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:com.glluch.profilesparser.ProfileHtmlReader.java

/**
 * Given a ICT profile in html file, extracts  
 * all the competences in the profile and builds a list of 
 * {@link com.glluch.profilesparser.ECFMap} from them.
 * @param filename The html file where the profile is stored.
 * @return A list with all the competences as {@link com.glluch.profilesparser.ECFMap}.
 * @throws IOException can't not read the html file.
 *//* w w w  . j  av  a  2 s .  c  o  m*/
public ArrayList<ECFMap> competences(String filename) throws IOException {
    init(filename);

    ArrayList<ECFMap> ecfm;
    int i = 0;
    Elements cs = doc.select("h4");//the first h4 is not a comptence

    ArrayList<String> comps = new ArrayList<>();//all comptences are here but not the levels
    for (Element c : cs) {
        if (i != 0) {
            comps.add(c.ownText());
        }
        i++;
    } //for

    ecfm = foundLevels(comps, allTxt);

    return ecfm;
}

From source file:com.liato.bankdroid.banking.banks.Hemkop.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }//from   w ww  .  jav a2 s . c  om

    urlopen = login();

    Document d = Jsoup.parse(response);
    Elements amounts = d.select(".bonusStatement .amount");
    Elements names = d.select(".bonusStatement .label");
    for (int i = 0; i < Math.min(amounts.size(), names.size()); i++) {
        Element amount = amounts.get(i);
        Element name = names.get(i);
        BigDecimal accountBalance = Helpers.parseBalance(amount.ownText());
        Account account = new Account(name.ownText().replace(":", "").trim(), accountBalance,
                String.format("acc_%d", i));
        if (i > 0) {
            account.setAliasfor("acc_0");
        }
        accounts.add(account);
        balance = balance.add(accountBalance);
    }

    if (accounts.isEmpty()) {
        throw new BankException(res.getText(R.string.no_accounts_found).toString());
    }

    Account account = accounts.get(0);
    try {
        response = urlopen.open("https://www.hemkop.se/Mina-sidor/Kontoutdrag/");
        d = Jsoup.parse(response);
        Elements es = d.select(".transactions tbody tr");
        ArrayList<Transaction> transactions = new ArrayList<Transaction>();
        for (Element e : es) {
            Transaction t = new Transaction(e.child(1).ownText().trim(), e.child(0).ownText().trim(),
                    Helpers.parseBalance(e.child(3).ownText()));
            if (!TextUtils.isEmpty(e.child(2).ownText())) {
                t.setCurrency(Helpers.parseCurrency(e.child(2).ownText().trim(), "SEK"));
            }
            transactions.add(t);
        }
        account.setTransactions(transactions);

        es = d.select(".currentBalance,.disposable");
        int i = 0;
        for (Element e : es) {
            Account a = new Account(e.child(0).ownText().trim(), Helpers.parseBalance(e.child(1).ownText()),
                    String.format("acc_cc_%d", i));
            a.setAliasfor("acc_0");
            accounts.add(a);
            i++;
        }

    } catch (ClientProtocolException e) {
        e.printStackTrace();
        Log.e(TAG, e.getMessage() != null ? e.getMessage() : "");
    } catch (IOException e) {
        e.printStackTrace();
        Log.e(TAG, e.getMessage() != null ? e.getMessage() : "");
    }

    super.updateComplete();
}

From source file:com.glluch.profilesparser.ProfileHtmlReader.java

/**
 * Read an parse a hmtl file which contains a ICT profile. Some parts are extracted 
 * with Jsop and some others from a plain text.
 * @param filename The html file where the profile is stored.
 * @return An ICTProfile read from the html file.
 *///from  w ww  .  jav  a 2 s.  c  om
@Override
public ICTProfile reader(String filename) {
    ICTProfile res = new ICTProfile();

    try {
        init(filename);

        Element ts = doc.select("h2").first();
        res.setTitle(ts.ownText().trim());

        //Get summary, the text  Mission and KPI
        int i = 0;
        Elements txts = doc.select("h3 + p");
        for (Element text : txts) {
            if (i == 0) {
                res.setSummary(text.ownText());
            }
            if (i == 1) {
                res.setMission(new Mission(text.ownText()));
            }
            if (i == 2) {
                res.setKpi(text.ownText());
            }
            i++;
        }

        //Get Mission Deliverables and tasks
        String acc = StringUtils.substringBetween(allTxt, "Accountable", "Responsible").trim();
        String respon = StringUtils.substringBetween(allTxt, "Responsible", "Contributor").trim();
        String contrib = StringUtils.substringBetween(allTxt, "Contributor", "Main task/s").trim();
        String tks = StringUtils.substringBetween(allTxt, "Main task/s", "KPI area ").trim();

        HashMap<Integer, String> uls = new HashMap<>();
        i = 0;
        if (StringUtils.isNotEmpty(acc)) {
            uls.put(i++, "Accountable");
        }
        if (StringUtils.isNotEmpty(respon)) {
            uls.put(i++, "Responsible");
        }
        if (StringUtils.isNotEmpty(contrib)) {
            uls.put(i++, "Contributor");
        }
        if (StringUtils.isNotEmpty(tks)) {
            uls.put(i++, "Main task/s");
        } //TODO delete else

        //System.out.println(uls.toString());
        Elements html_uls = doc.select("ul");
        if (html_uls.size() != uls.size()) {

            System.out.println("\nERROR in " + res.getTitle() + ", num ul=" + html_uls.size() + ", num_parts="
                    + uls.size());
        }
        i = 0;
        for (Element ul : html_uls) {
            String target = uls.get(i);
            res = place(res, target, ul);
            i++;
        }

        //res.setTasks(tasks);
        //Get Competences
        i = 0;
        Elements cs = doc.select("h4");//the first h4 is not a competence

        ArrayList<String> comps = new ArrayList<>();//all comptences are here but not the levels
        for (Element c : cs) {
            if (i != 0) {
                comps.add(c.ownText());
            }
            i++;
        } //for

        res.setEcfs(foundLevels(comps, allTxt));

        //first p after first h3  h3:eq(0) + p
    } catch (IOException ex) {
        Logger.getLogger(ProfileHtmlReader.class.getName()).log(Level.SEVERE, null, ex);
    }
    return res;
}

From source file:me.vertretungsplan.parser.DaVinciParser.java

@NotNull
void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();

    Element titleElem;/* w w  w.  java  2  s. com*/
    if (doc.select("h1.list-table-caption").size() > 0) {
        titleElem = doc.select("h1.list-table-caption").first();
    } else {
        // DaVinci 5
        titleElem = doc.select("h2").first();
    }
    String title = titleElem.text();
    String klasse = null;
    // title can either be date or class
    Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}");
    Matcher dateMatcher = datePattern.matcher(title);
    if (dateMatcher.find()) {
        day.setDateString(dateMatcher.group());
        day.setDate(ParserUtils.parseDate(dateMatcher.group()));
    } else {
        klasse = title;
        String nextText = titleElem.nextElementSibling().text();
        if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
            day.setDateString(nextText);
            day.setDate(ParserUtils.parseDate(nextText));
        } else {
            // could not find date, must be multiple days
            day = null;
        }
    }

    for (Element p : doc.select(".row:has(h1.list-table-caption) p")) {
        for (TextNode node : p.textNodes()) {
            if (!node.text().trim().isEmpty() && day != null)
                day.addMessage(node.text().trim());
        }
    }
    for (Element message : doc.select(".callout")) {
        for (TextNode node : message.textNodes()) {
            if (!node.text().trim().isEmpty())
                day.addMessage(node.text().trim());
        }
    }

    Element lastChangeElem = doc.select(".row.copyright div").first();
    if (lastChangeElem == null) {
        // DaVinci 5
        lastChangeElem = doc.select("h1").first();
    }
    String lastChange = lastChangeElem.ownText();
    Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
    Matcher matcher = pattern.matcher(lastChange);
    if (matcher.find()) {
        LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm")
                .parseLocalDateTime(matcher.group(1));
        if (day != null) {
            day.setLastChange(lastChangeTime);
        } else {
            schedule.setLastChange(lastChangeTime);
        }
    } else {
        Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})");
        Matcher matcher2 = pattern2.matcher(lastChange);
        if (matcher2.find()) {
            LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm")
                    .parseLocalDateTime(matcher2.group(1));
            if (day != null) {
                day.setLastChange(lastChangeTime);
            } else {
                schedule.setLastChange(lastChangeTime);
            }
        }
    }

    if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
        Element table = doc.select(".list-table, table").first();
        parseDaVinciTable(table, schedule, klasse, day, colorProvider);
    }

    if (day != null) {
        schedule.addDay(day);
    }
}

From source file:crawler.HackerEarthCrawler.java

@Override
public void crawl() {

    int flag = 0;

    //set of urls which should be crawled
    TreeSet<String> linksset = new TreeSet<String>();
    TreeSet<String> tempset = new TreeSet<String>();
    TreeSet<String> tutorialset = new TreeSet<String>();
    //final set of problem urls
    TreeSet<String> problemset = new TreeSet<String>();
    //visited for maintaing status of if url is already crawled or not
    TreeMap<String, Integer> visited = new TreeMap<String, Integer>();

    //add base url
    linksset.add(baseUrl);/*w w  w . ja v a2s . c om*/
    //mark base url as not crawled
    visited.put(baseUrl, 0);

    try {
        while (true) {
            flag = 0;
            tempset.clear();

            for (String str : linksset) {
                //check if url is already crawled or not and it has valid domain name
                if ((visited.get(str) == 0) && (str.startsWith("https://www.hackerearth.com/"))) {
                    System.out.println("crawling  " + str);

                    //retriving response of current url as document
                    Document doc = Jsoup.connect(str).timeout(0).userAgent(
                            "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")
                            .referrer("http://www.google.com").ignoreHttpErrors(true).get();
                    //retriving all urls from current page
                    Elements links = doc.select("a[href]");

                    //mark url as crawled
                    visited.put(str, 1);

                    //mark flag as url is crawled
                    flag = 1;
                    //retrive all urls
                    for (Element link : links) {
                        if (link.absUrl("href").endsWith("/tutorial/")) {
                            tutorialset.add(link.absUrl("href"));
                        }
                        //check if url is problem url then add it in problemurlset
                        if (link.absUrl("href").startsWith("https://www.hackerearth.com/")
                                && isProblemUrl(link.absUrl("href"))) {
                            problemset.add(link.absUrl("href"));
                        }
                        //check if url has valid domain and it has problem urls or not
                        if (link.absUrl("href").contains(("https://www.hackerearth.com/"))
                                && isCrawlable(link.absUrl("href"))) {
                            //if link is not visited then mark it as uncrawled
                            if (!visited.containsKey(link.absUrl("href"))) {
                                visited.put(link.absUrl("href"), 0);
                            }
                            //add it in tempsetorary set
                            tempset.add(link.absUrl("href"));
                            //System.out.println("\n  base: "+str+" ::: link  : " + link.absUrl("href"));
                        }
                    }
                }
            }
            //if nothing is left to crawl break the loop
            if (flag == 0) {
                break;
            }
            //add all retrieved links to linksset
            linksset.addAll(tempset);
        }

        System.out.println("\n\ntotal problem urls " + problemset.size());

        int i = 0;
        for (String str : problemset) {
            System.out.println("link " + i + " : " + str);
            i++;
        }

    } catch (IOException ex) {
        Logger.getLogger(HackerEarthCrawler.class.getName()).log(Level.SEVERE, null, ex);
    }

    //scrap and store into database
    //for every problem url scrap problem page
    for (String problemUrl : problemset) {

        System.out.println("problemUrl :" + problemUrl);
        try {
            //create problem class to store in database
            Problem problem = new Problem();
            String problemSIOC = "", problemIOC = "";
            String problemTitle = "", problemStatement = "", problemInput = "", problemOutput = "",
                    problemConstraints = "";
            String sampleInput = "", sampleOutput = "";
            String problemExplanation = "";
            //set default timelimit to 1 second
            double problemTimeLimit = 1.0;
            ArrayList<String> tags = new ArrayList<String>();

            //get response for given problem url
            Response response = Jsoup.connect(problemUrl).execute();
            Document doc = response.parse();

            //retrieve problem title from page
            Element elementTitle = doc.getElementsByTag("title").first();
            StringTokenizer stTitle = new StringTokenizer(elementTitle.text(), "|");
            problemTitle = stTitle.nextToken().trim();

            Element content = doc.getElementsByClass("starwars-lab").first();
            problemSIOC = content.text();
            Elements e = content.children();

            //to find problem statement
            String breakloop[] = { "input", "input:", "input :", "input format:", "input format :",
                    "input format", "Input and output", "constraints :", "constraints:", "constraints",
                    "$$Input :$$" };
            flag = 0;
            for (Element p : e) {
                String tempStatement = "";
                for (Element pp : p.getAllElements()) {

                    for (String strbreak : breakloop) {
                        if (StringUtils.equalsIgnoreCase(pp.ownText(), strbreak)) {
                            //System.out.println("strbreak :"+strbreak);

                            tempStatement = p.text().substring(0,
                                    p.text().toLowerCase().indexOf(strbreak.toLowerCase()));
                            // System.out.println("temp "+tempStatement);
                            flag = 1;
                            break;
                        }
                    }
                }

                if (flag == 1) {
                    problemStatement += tempStatement;
                    //remove extra space at end
                    if (tempStatement.length() == 0) {
                        problemStatement = problemStatement.substring(0, problemStatement.length() - 1);
                    }
                    break;
                }
                problemStatement += p.text() + " ";
            }

            System.out.println("problemSIOC :" + problemSIOC);
            System.out.println("problemStatement :" + problemStatement);

            if (problemStatement.length() <= problemSIOC.length()) {
                //remove problem statement from whole text and remove extra spaces at the beginning and the end
                problemIOC = problemSIOC.substring(problemStatement.length()).trim();
            } else {
                problemIOC = "";
            }

            System.out.println("problemIOC :" + problemIOC);

            //keywords for identifying input
            String decideInput[] = { "Input format :", "Input format:", "Input format", "inputformat:",
                    "inputformat :", "inputformat", "input and output", "input :", "input:", "input" };
            //keywords for identifying output
            String decideOutput[] = { "output format :", "output format:", "Output format", "outputformat:",
                    "outputformat :", "outputformat", "output :", "output:", "output" };
            //keywords for identifying constraint
            String decideConstraint[] = { "constraints:", "constraints :", "constraints", "Constraints :",
                    "constraint:", "constraint :", "constraint", "Contraints :" };

            int posin = 0, posoutput = 0, poscon = 0, idxin, idxout, idxcon, flaginput = 0, flagoutput = 0,
                    flagcon = 0, inlen = 0, outlen = 0, conlen = 0;

            //find inputformat position,length of keyword
            for (idxin = 0; idxin < decideInput.length; idxin++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideInput[idxin])) {

                    posin = problemIOC.toLowerCase().indexOf(decideInput[idxin].toLowerCase());
                    flaginput = 1;
                    inlen = decideInput[idxin].length();

                    //decide it is keyowrd for actucal input or it is "sample input"
                    if (StringUtils.containsIgnoreCase(problemIOC, "sample input")) {
                        if (posin > problemIOC.toLowerCase().indexOf("sample input")) {
                            flaginput = 0;
                            inlen = 0;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
            }

            //find outputformat position,length of keyword
            for (idxout = 0; idxout < decideOutput.length; idxout++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideOutput[idxout])) {
                    posoutput = problemIOC.toLowerCase().indexOf(decideOutput[idxout].toLowerCase());
                    flagoutput = 1;
                    outlen = decideOutput[idxout].length();
                    break;
                }
            }

            //find constraint position,length of keyword
            for (idxcon = 0; idxcon < decideConstraint.length; idxcon++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideConstraint[idxcon])) {
                    poscon = problemIOC.toLowerCase().indexOf(decideConstraint[idxcon].toLowerCase());
                    flagcon = 1;
                    conlen = decideConstraint[idxcon].length();
                    break;
                }
            }

            System.out.println("input " + flaginput + " " + inlen + " " + posin);
            System.out.println("output " + flagoutput + " " + outlen + " " + posoutput);
            System.out.println("constraint " + flagcon + " " + conlen + " " + poscon);
            //retrieve problem input and output if present in problem page

            //if input format is present
            if (flaginput == 1) {
                //if input keyword is "input and output" and contraint is present in problem page
                if (idxin == 6 && flagcon == 1) {
                    problemInput = problemIOC.substring(inlen, poscon);
                }
                //if input keyword is "input and output" and contraint is not present in problem page
                else if (idxin == 6 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen);
                }
                //if output format and constraint is present
                else if (flagoutput == 1 && flagcon == 1) {
                    //if constraint is present before input format
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    }
                    //if constraint is present before sample
                    else if (poscon < posoutput) {
                        problemInput = problemIOC.substring(inlen, poscon);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    } else {
                        problemInput = problemIOC.substring(inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen, poscon);
                    }
                }
                //if constraint is not present
                else if (flagoutput == 1 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen, posoutput);
                    problemOutput = problemIOC.substring(posoutput + outlen);
                } else if (flagoutput == 0 && flagcon == 1) {
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen);
                    } else {
                        problemInput = problemIOC.substring(poscon + conlen, posin);
                    }
                    problemOutput = "";
                } else {
                    problemInput = problemIOC.substring(inlen);
                    problemOutput = "";
                }
            }
            //if input format and output format is not present
            else {
                problemInput = "";
                problemOutput = "";
            }

            //if constraint is present
            if (flagcon == 1) {
                //if constraint is present before input format
                if (poscon < posin) {
                    problemConstraints = problemIOC.substring(0, posin);
                }
                //if constraint is present before output format
                else if (poscon < posoutput) {
                    problemConstraints = problemIOC.substring(poscon + conlen, posoutput);
                } else {
                    problemConstraints = problemIOC.substring(poscon + conlen);
                }
            }

            System.out.println("problemInput :" + problemInput);
            System.out.println("problemOutput :" + problemOutput);
            System.out.println("problemConstraints :" + problemConstraints);

            //retrieve problem tags from problem page
            Element elementtag = doc.getElementsByClass("problem-tags").first().child(1);
            StringTokenizer st = new StringTokenizer(elementtag.text(), ",");
            while (st.hasMoreTokens()) {
                tags.add(st.nextToken().trim());
            }

            //retrieve sample input sample output if present
            Element elementSIO = doc.getElementsByClass("input-output-container").first();
            //if sample input output is present
            if (elementSIO != null) {
                //find position of sample output
                int soutpos = elementSIO.text().indexOf("SAMPLE OUTPUT");
                sampleInput = elementSIO.text().substring(12, soutpos);
                sampleOutput = elementSIO.text().substring(soutpos + 13);
                System.out.println("Sample input :\n" + sampleInput + "\n\n\n");
                System.out.println("Sample Output :\n" + sampleOutput);
            } else {
                sampleInput = "";
                sampleOutput = "";
            }

            //retrieve problem explanation from problem page if present
            Element elementExplanation = doc.getElementsByClass("standard-margin").first().child(0);
            if (elementExplanation.text().toLowerCase().contains("explanation")) {
                problemExplanation = elementExplanation.nextElementSibling().text();
            }
            System.out.println("Explanation :" + problemExplanation);

            //retrieve timelimit
            Element elementTL = doc.getElementsByClass("problem-guidelines").first().child(0).child(1);
            StringTokenizer stTL = new StringTokenizer(elementTL.ownText(), " ");
            problemTimeLimit = Double.parseDouble(stTL.nextToken());

            //System.out.println("problemTimeLimit :"+problemTimeLimit);
            //set all retrieved information to problem class
            problem.setProblemUrl(problemUrl);
            if (problemTitle.length() == 0) {
                problemTitle = null;
            }
            if (problemStatement.length() == 0) {
                problemStatement = null;
            }
            if (problemInput.length() == 0) {
                problemInput = null;
            }
            if (problemOutput.length() == 0) {
                problemOutput = null;
            }
            if (problemExplanation.length() == 0) {
                problemExplanation = null;
            }
            if (problemConstraints.length() == 0) {
                problemConstraints = null;
            }
            problem.setTitle(problemTitle);
            problem.setProblemUrl(problemUrl);
            problem.setProblemStatement(problemStatement);
            problem.setInputFormat(problemInput);
            problem.setOutputFormat(problemOutput);
            problem.setTimeLimit(problemTimeLimit);
            problem.setExplanation(problemExplanation);
            problem.setConstraints(problemConstraints);

            //set sample input output to problem class
            SampleInputOutput sampleInputOutput = new SampleInputOutput(problem, sampleInput, sampleOutput);
            problem.getSampleInputOutputs().add(sampleInputOutput);
            //set platform as hackerearth
            problem.setPlatform(Platform.HackerEarth);
            for (String strtag : tags) {
                problem.getTags().add(strtag);
            }

            //store in database
            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Problem p where p.problemUrl = :problem_url";
                Problem oldProblem = (Problem) session.createQuery(hql).setString("problem_url", problemUrl)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    problem.setId(oldProblem.getId());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(problem);
                } else {
                    task = "saved";
                    session.save(problem);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, problem.getProblemUrl() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + problemUrl, e);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }
        } catch (Exception ee) {
            System.out.println(ee.toString());
        }
    }

    System.out.println("\n\n\n\ntutorial urls\n\n");
    try {

        for (String tutorialurl : tutorialset) {
            //System.out.println(tutorialurl+"\n\n");
            Response tutorialres = Jsoup.connect(tutorialurl).execute();
            Document doc = tutorialres.parse();

            Tutorial tutorial = new Tutorial();
            tutorial.setContent(doc.getElementsByClass("tutorial").first().text());

            tutorial.setName(baseUrl);
            tutorialurl = tutorialurl.substring(0, tutorialurl.length() - 10);
            StringTokenizer tutorialtok = new StringTokenizer(tutorialurl, "/");

            String tempstr = "";
            while (tutorialtok.hasMoreTokens()) {
                tempstr = tutorialtok.nextToken();
            }

            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Tutorial p where p.name = :name";
                Tutorial oldProblem = (Tutorial) session.createQuery(hql).setString("name", tempstr)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    tutorial.setName(oldProblem.getName());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(tutorial);
                } else {
                    task = "saved";
                    tutorial.setName(tempstr);
                    session.save(tutorial);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, tutorial.getName() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + tempstr, ee);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }

        }
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}