Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:org.bungeni.ext.integration.bungeniportal.BungeniServiceAccess.java

private List<BasicNameValuePair> getFormFieldSelectDefaultValues(Document doc, List<String> fieldNames) {
    List<BasicNameValuePair> nvp = new ArrayList<BasicNameValuePair>(0);
    for (String fieldName : fieldNames) {
        Elements inputItems = doc.select("[name=" + fieldName + "]");
        for (int i = 0; i < inputItems.size(); i++) {
            Element inputItem = inputItems.get(i);
            Elements selItems = inputItem.select("[selected=selected]");
            for (int j = 0; j < selItems.size(); j++) {
                Element selItem = selItems.get(j);
                nvp.add(new BasicNameValuePair(fieldName, selItem.attr("value")));
            }/*from w  ww  .  ja v a2s  . c o m*/
        }
    }
    return nvp;
}

From source file:gov.medicaid.screening.dao.impl.OIGDAOBean.java

/**
 * Parses the excluded provider profile details page.
 *
 * @param page the details page//www. ja v a 2s . c  o m
 * @return the parsed license details
 * @throws ParsingException if the expected tags were not found
 */
private ProviderProfile parseProfile(Document page) throws ParsingException {
    ProviderProfile profile = new ProviderProfile();

    // name
    User user = new User();
    profile.setUser(user);
    user.setLastName(page.select("th:containsOwn(Last Name) + td").text());
    user.setFirstName(page.select("th:containsOwn(First Name) + td").text());

    // business
    String businessName = page.select("th:containsOwn(Entity) + td").text();
    if (!"N/A".equals(businessName)) {
        Business business = new Business();
        profile.setBusiness(business);
        business.setName(businessName);
    }

    // DOB
    Date dob = parseDate(page.select("th:has(acronym:containsOwn(DOB)) + td").text(), DATE_FORMAT);
    if (dob != null) {
        profile.setDob(dob);
    }

    // exclusion type
    ExclusionType exclusionType = new ExclusionType();
    profile.setExclusionType(exclusionType);
    exclusionType.setName(page.select("th:containsOwn(Excl. Type) + td").text());

    // specialty
    List<Specialty> specialties = new ArrayList<Specialty>();
    Specialty specialty = new Specialty();
    specialties.add(specialty);
    specialty.setName(page.select("th:containsOwn(Specialty) + td").text());
    profile.setSpecialties(specialties);

    // address
    Elements addrElement = page.select("th:containsOwn(Address) + td");
    String addr = addrElement.text();
    Element addrNextRow = addrElement.parents().first().nextElementSibling();
    if ("".equals(addrNextRow.select("th").text())) {
        addr += " " + addrNextRow.select("td").text();
    }
    Address address = new Address();
    address.setLocation(addr);
    profile.setAddresses(Arrays.asList(new Address[] { address }));

    Date date = parseDate(page.select("th:containsOwn(Excl. Date) + td").text(), DATE_FORMAT);
    if (date != null) {
        profile.setRequestEffectiveDate(date);
    }

    return profile;
}

From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java

/**
 * ???//  ww w  . ja v a 2 s  .c  o  m
 */
@Override
public Collection<HttpSeed> findPageSeed(Collection<HttpSeed> seeds) throws Exception {

    if (CollectionUtils.isEmpty(seeds)) {
        return null;
    }

    Collection<HttpSeed> seedGroups = new ArrayList<HttpSeed>();

    for (HttpSeed seed : seeds) {
        Document doc = parse(seed.getHtml());

        // ?URL
        Elements page_form_elements = doc.select("#pageForm");
        if (page_form_elements.isEmpty()) {
            return null;
        }

        Element page_form_e = page_form_elements.get(0);
        // URL
        String url = DOMAIN + page_form_e.attr("action");
        Elements param_elements = page_form_e.select("input");

        // 
        int totalPageNum = this.getTotalPageNum(doc);

        for (int pageNo = 1; pageNo <= totalPageNum; pageNo++) {

            // ?
            Map<String, String> params = new HashMap<String, String>();
            for (Element param_e : param_elements) {
                params.put(param_e.attr("name"), param_e.attr("value"));
            }
            // 
            params.put("curstart", String.valueOf(pageNo));

            HttpSeed httpSeed = this.initListHttpSeed(url, params);

            seedGroups.add(httpSeed);
        }
    }

    return seedGroups;
}

From source file:com.crawler.app.run.JellyfishCrawlerSiteVNW.java

/**
 * This function is called when a page is fetched and ready to be processed
 * by your program.// ww w .  j a v  a 2s  . c om
 */
@Override
public void visit(Page page) {
    String url = page.getWebURL().getURL();
    //logger.info("URL: ", url);
    String host = "127.0.0.1";
    String port = "3306";
    String dbName = "crawler";
    String dbUser = "root";
    String dbPwd = "";
    MysqlCrawler.createConn(host, port, dbName, dbUser, dbPwd);
    System.out.println("\n URL visit: " + url);

    String href = url.toLowerCase();
    if (href.startsWith("http://www.vietnamworks.com/")
            && (href.endsWith("jd") || href.endsWith("jv") || href.endsWith("jv/"))) {

        if (page.getParseData() instanceof HtmlParseData) {
            HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
            String text = htmlParseData.getText();
            String html = htmlParseData.getHtml();
            String title = htmlParseData.getTitle();

            Document doc = Jsoup.parse(html, "UTF-8");
            //doc.outputSettings().escapeMode(EscapeMode.xhtml);
            Element body = doc.body();
            //get meta description content
            //String description = doc.select("meta[name=description]").get(0).attr("content");
            //System.out.println("Meta description : " + description);

            //Element e = doc.getElementById("detail_copyB");
            Element detail = body.select("section[id=content]").first();
            //String aTitlePost = getTagValues(e.toString(), "<h3>", "</h3>");

            String jobUrl = url;//detail.select("h3[class=title] a").first().attr("abs:href");
            String jobName = detail.select("div[class=job-header-info] h1").html();
            String companyName = detail.select("span[class=company-name text-lg block] strong").html();
            String companyAddress = detail.select("span[class=company-address block]").html();
            String jobLocation = detail.select("p[class=work-location] span[itemprop=address] a").html();
            String companyContact = detail.select("div[class=col-xs-12 col-md-8 col-lg-8 pull-left] p strong")
                    .html();// div[class=company-info] span[class=company-address block] p

            System.out.println("\n Title : " + jobName);
            System.out.println("\n Contact : " + companyContact);

            try {
                /*
                Integer siteID = 2;
                String companyPhone = "", companyWebsite = "";
                MysqlCrawler.getInstance().insertJFHRContents(
                      siteID
                      , jobUrl
                      , jobName
                      , jobLocation
                      , companyName
                      , companyAddress
                      , companyPhone
                      , companyContact
                      , companyWebsite);
                       */
                //System.exit(1);
            } catch (Exception ex) {
                //System.out.println("\n Fail I : " + i);
                System.out.println("\n Ex : " + ex);
            }

            //String eCrawl2 = listTD.get(0);
            //String eCrawl3 = listTD.get(1);
            /*
            System.out.println("\n Cate : " + bCate);
            System.out.println("\n Title : " + aTitlePost);
            System.out.println("\n Date : " + hDatePost);*/
            //System.out.println("\n E : " + listTD.toString() + " --- " + eCrawl2 + "----" + eCrawl3);
            //System.out.println("\n Count : " + doc.toString());
            //System.out.println("\n Total Div: --" + listDetail.size());
            //System.exit(1);

            //String content = htmlParseData.getBodyText();
            //Set<WebURL> links = htmlParseData.getOutgoingUrls();

            //logger.debug("Text length: {}", text.length());
            //System.out.println("Text length: {}" + text);

            //System.out.println("\n Title: {}" + title);

            //logger.debug("Html: {}", html);
            //System.out.println("Html: {}" + html);

            //logger.debug("Number of outgoing links: {}", links.size());
            //System.out.println("Number of outgoing links: {}" + links.size());

            //final String str = "<tag>apple</tag><b>hello</b><tag>orange</tag><tag>pear</tag>";
            //System.out.println("\n Matcher: {}" + Arrays.toString(getTagValues(html).toArray())); // Prints [apple, orange, pear]

            //MysqlCrawler.getInstance().insertURL(url, title, "");      

        }
    }

    /*
        Header[] responseHeaders = page.getFetchResponseHeaders();
        if (responseHeaders != null) {
          logger.debug("Response headers:");
          for (Header header : responseHeaders) {
            logger.debug("\t{}: {}", header.getName(), header.getValue());
          }
        }
    */
    logger.debug("=============");
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex)
        throws IOException, OpacErrorException, JSONException {
    final String searchUrl;
    if (!initialised) {
        start();/*w  w  w.  j  av  a2  s. c o  m*/
    }
    try {
        searchUrl = buildSearchUrl(query, pageIndex);
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
    final String html = httpGet(searchUrl, getDefaultEncoding());
    final Document doc = Jsoup.parse(html);

    final Element navigation = doc.select(".result_view .navigation").first();
    final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0;

    final Element ul = doc.select(".result_view ul.list").first();
    final List<SearchResult> results = new ArrayList<>();
    for (final Element li : ul.children()) {
        if (li.hasClass("zugangsmonat")) {
            continue;
        }
        final SearchResult result = new SearchResult();
        final Element title = li.select(".titelinfo a").first();
        result.setId(getQueryParamsFirst(title.attr("href")).get("id"));
        result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text());
        result.setNr(results.size());
        result.setPage(pageIndex);
        result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text()));
        result.setCover(getCover(li));
        final String statusImg = li.select(".status img").attr("src");
        result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN
                : statusImg.contains("-no") ? SearchResult.Status.RED : null);
        results.add(result);
    }
    return new SearchRequestResult(results, totalResults, pageIndex);
}

From source file:gov.medicaid.screening.dao.impl.ChiropracticLicenseDAOBean.java

/**
 * Parses the Chiropractic license details page.
 * /*w  w  w.j  a v a2  s . co  m*/
 * @param page
 *            the details page
 * @param licenseType
 *            if user has multiple licenses, this one will be used
 * @return the parsed license details
 * @throws ParsingException
 *             if the expected tags were not found
 */
private License parseLicense(Document page, String licenseType) throws ParsingException {
    License license = new License();
    ProviderProfile profile = new ProviderProfile();
    license.setProfile(profile);

    User user = new User();
    profile.setUser(user);
    Elements tables = page.select("table");
    for (Element cell : tables.get(0).select("td")) {
        if (cell.text().equals("First Name")) {
            user.setFirstName(cell.nextElementSibling().text());
        } else if (cell.text().equals("Middle Name")) {
            user.setMiddleName(cell.nextElementSibling().text());
        } else if (cell.text().equals("Last Name")) {
            user.setLastName(cell.nextElementSibling().text());
        } else if (cell.text().equals("Gender")) {
            String gender = cell.nextElementSibling().text();
            if (Util.isNotBlank(gender)) {
                if ("M".equalsIgnoreCase(gender)) {
                    profile.setSex(Sex.MALE);
                } else {
                    profile.setSex(Sex.FEMALE);
                }
            }
        }
    }

    List<Address> addresses = new ArrayList<Address>();
    Address address = new Address();
    addresses.add(address);
    profile.setAddresses(addresses);
    StringBuffer locBuffer = new StringBuffer();
    for (Element cell : tables.get(1).select("td")) {
        if (cell.text().equals("Address Line1")) {
            locBuffer.insert(0, cell.nextElementSibling().text() + " ");
        } else if (cell.text().equals("Address Line2")) {
            locBuffer.append(cell.nextElementSibling().text());
        } else if (cell.text().equals("City")) {
            address.setCity(cell.nextElementSibling().text());
        } else if (cell.text().equals("State")) {
            address.setState(cell.nextElementSibling().text());
        } else if (cell.text().equals("ZIP")) {
            address.setZipcode(cell.nextElementSibling().text());
        } else if (cell.text().equals("Phone Number")) {
            profile.setContactPhoneNumber(cell.nextElementSibling().text());
        }
    }
    address.setLocation(locBuffer.toString().trim());

    for (Element row : tables.get(2).select("tr")) {
        String lType = row.select("td:eq(0)").text();
        if (licenseType != null && !lType.startsWith(licenseType)) {
            // user has multiple licenses, the results will show this user twice (search by name)
            continue;
        }

        LicenseType type = new LicenseType();
        type.setName(row.select("td:eq(0)").text());
        license.setType(type);
        license.setLicenseNumber(row.select("td:eq(1)").text());

        LicenseStatus status = new LicenseStatus();
        status.setName(row.select("td:eq(2)").text());
        license.setStatus(status);

        String issueDate = row.select("td:eq(3)").text();
        if (Util.isNotBlank(issueDate)) {
            license.setOriginalIssueDate(parseDate(issueDate, DATE_FORMAT));
        }

        String renewalDate = row.select("td:eq(4)").text();
        if (Util.isNotBlank(renewalDate)) {
            license.setRenewalDate(parseDate(renewalDate, DATE_FORMAT));
        }

        String expirationDate = row.select("td:eq(5)").text();
        if (Util.isNotBlank(expirationDate)) {
            license.setExpireDate(parseDate(expirationDate, DATE_FORMAT));
        }
    }
    return license;
}

From source file:net.kevxu.purdueassist.course.CatalogDetail.java

private CatalogDetailEntry parseDocument(Document document)
        throws HtmlParseException, CourseNotFoundException, IOException {
    CatalogDetailEntry entry = new CatalogDetailEntry(subject, cnbr);
    Elements tableElements = document.getElementsByAttributeValue("summary",
            "This table lists the course detail for the selected term.");
    if (tableElements.isEmpty() != true) {
        // get name
        try {// w w  w  .  j  a v  a2  s.  c  o  m
            Element body = tableElements.first().select("tbody").first();
            String nameBlock = body.select("tr td.nttitle").first().text();
            String[] temp = nameBlock.split(subject.name() + " " + String.valueOf(cnbr));
            String name = temp[temp.length - 1].substring(3);
            entry.setName(name);

            // get description
            body = body.select(".ntdefault").first();
            String text = body.text();
            int split = text.indexOf("Levels:");
            String description = text.substring(0, split);
            description = description.substring(20);
            entry.setDescription(description);

            // get levels
            int begin = split;
            int end = text.indexOf("Schedule Types:");
            String levels = text.substring(begin + 8, end);
            temp = levels.split("[ ,]");
            List<String> lvs = new ArrayList<String>();
            for (String s : temp)
                if (!s.equals("")) {
                    lvs.add(s);
                }
            entry.setLevels(lvs);

            // get type and prerequisites
            List<Type> types = new ArrayList<Type>();
            List<String> preq = new ArrayList<String>();
            Elements parsing_A = body.select("a");
            for (Element e : parsing_A) {
                if (e.attr("href").contains("schd_in") && !(e.attr("href").contains("%"))) {

                    try {
                        types.add(Type.valueOf(e.text().replace(" ", "")));
                    } catch (Exception exception) {
                        throw new HtmlParseException();
                    }
                } else if (e.attr("href").contains("sel_attr=")) {
                    preq.add(e.text());
                }
            }
            if (types.size() > 0)
                entry.setType(types);
            if (preq.size() > 0)
                entry.setPrerequisites(preq);

            // get offered by
            begin = text.indexOf("Offered By:");
            end = text.indexOf("Department:");
            if (end < 0)
                end = text.indexOf("Course Attributes:");
            if (end > 0) {
                entry.setOfferedBy(text.substring(begin + 12, end - 1));
            }

            // get department
            begin = text.indexOf("Department:");
            if (begin > 0) {
                end = text.indexOf("Course Attributes:");
                entry.setDepartment((text.substring(begin + 12, end - 1)));
            }

            // get campus
            begin = text.indexOf("May be offered at any of the following campuses:");
            String campuses;
            end = text.indexOf("Repeatable for Additional Credit:");
            if (end < 0)
                end = text.indexOf("Learning Objectives:");
            if (end < 0)
                end = text.indexOf("Restrictions:");
            if (end < 0)
                end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (end < 0) {
                campuses = text
                        .substring(begin + "May be offered at any of the following campuses:".length() + 5);
            } else {
                campuses = text.substring(
                        begin + "May be offered at any of the following campuses:".length() + 5, end - 1);
            }
            temp = campuses.replace("    ", "#").split("#");
            List<String> camps = new ArrayList<String>();
            for (String s : temp) {
                if (s.length() > 1) {
                    camps.add(s);
                }

            }
            entry.setCampuses(camps);

            // get restrictions
            begin = text.indexOf("Restrictions:");
            end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (begin > 0 && end < 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length()).replace("      ", "\n"));
            } else if (begin > 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length(), end).replace("      ", "\n"));
            }

        } catch (StringIndexOutOfBoundsException e) {
            // no type, not available
            // System.out.println("-----------");
            // System.out.println("Error for cnbr = " + cnbr);
            // System.out.println("-----------");
        }
    } else {
        throw new CourseNotFoundException();
    }

    return entry;
}

From source file:com.gumtreescraper.scraper.GumtreeScraper.java

public void scrapeWithJSoup(List<Gumtree> gumtrees, String url) throws IOException {

    //        openSite(url);
    //            waitForPageToLoad();

    String nextPageUrl = url;//  w  w  w . j  ava 2s.  c om
    boolean needContinue = true;
    do {

        try {
            Document doc = Jsoup.connect(nextPageUrl).timeout(getTimeout() * 1000).userAgent("Mozilla")
                    //                     .userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36")
                    .get();
            Elements adElements = doc.select("#srchrslt-adtable > li");
            int size = adElements.size();
            for (int i = 0; i < size; i++) {
                Element ad = adElements.get(i);

                if (!isOwner(ad)) {
                    continue;
                }

                Element linkElement = ad.select("h6.rs-ad-title > a").first();

                if (linkElement == null) {
                    System.out.print(ad);
                    continue;
                }

                String adUrl = linkElement.attr("href");
                Gumtree gumtree = new Gumtree();
                gumtree.setUrl(BASE_URL + adUrl);
                gumtrees.add(gumtree);

                if (i == size - 1) { // last element
                    Elements adDateElements = ad.select("div.rs-ad-date");
                    if (adDateElements.isEmpty()) {
                        continue;
                    }

                    if (!needToScrapeNextPage(adDateElements.first().text().trim())) {
                        needContinue = false;
                    }
                }
            }

            Elements nextElements = doc.select("a.rs-paginator-btn.next");
            if (nextElements.isEmpty()) {
                break;
            }

            nextPageUrl = BASE_URL + nextElements.first().attr("href");
            System.out.println("next page: " + nextPageUrl);
        } catch (Exception oex) {
            System.out.println(oex);
        }
    } while (true && needContinue);
}

From source file:me.vertretungsplan.parser.IndiwareParser.java

SubstitutionScheduleDay parseIndiwareDay(Element doc, boolean html) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();

    DataSource ds;//from  ww  w .  j  ava2s  . c  o  m
    if (html) {
        ds = new HTMLDataSource(doc);
    } else {
        ds = new XMLDataSource(doc);
    }

    Matcher matcher = datePattern.matcher(ds.titel().text());
    if (!matcher.find())
        throw new IOException("malformed date: " + ds.titel().text());
    String date = matcher.group();
    day.setDate(
            DateTimeFormat.forPattern("EEEE, dd. MMMM yyyy").withLocale(Locale.GERMAN).parseLocalDate(date));

    String lastChange = ds.datum().text();
    day.setLastChange(DateTimeFormat.forPattern("dd.MM.yyyy, HH:mm").withLocale(Locale.GERMAN)
            .parseLocalDateTime(lastChange));

    if (ds.kopfinfos().size() > 0) {
        for (Element kopfinfo : ds.kopfinfos()) {
            String title = html ? kopfinfo.select("th").text() : kopfinfoTitle(kopfinfo.tagName()) + ":";

            StringBuilder message = new StringBuilder();
            if (title != null && !title.isEmpty()) {
                message.append("<b>").append(title).append("</b>").append(" ");
            }
            message.append(html ? kopfinfo.select("td").text() : kopfinfo.text());

            day.addMessage(message.toString());
        }
    }

    if (ds.fuss() != null) {
        StringBuilder message = new StringBuilder();
        boolean first = true;
        for (Element fusszeile : ds.fusszeilen()) {
            if (first) {
                first = false;
            } else {
                message.append("\n");
            }
            message.append(fusszeile.text());
        }
        day.addMessage(message.toString());
    }

    List<String> columnTypes = null;
    if (html) {
        columnTypes = new ArrayList<>();
        for (Element th : ((HTMLDataSource) ds).headers()) {
            columnTypes.add(th.className().replace("thplan", "").replace("thlplan", ""));
        }
    }

    for (Element aktion : ds.aktionen()) {
        Substitution substitution = new Substitution();
        String type = "Vertretung";
        String course = null;
        int i = 0;
        for (Element info : aktion.children()) {
            String value = info.text().replace("\u00a0", "");
            if (value.equals("---")) {
                i++;
                continue;
            }
            final String columnType = html ? columnTypes.get(i) : info.tagName();
            switch (columnType) {
            case "klasse":
                Set<String> classes = new HashSet<>();
                for (String klasse : value.split(",")) {
                    Matcher courseMatcher = coursePattern.matcher(klasse);
                    if (courseMatcher.matches()) {
                        classes.add(courseMatcher.group(1));
                        course = courseMatcher.group(2);
                    } else {
                        classes.add(klasse);
                    }
                }
                substitution.setClasses(classes);
                break;
            case "stunde":
                substitution.setLesson(value);
                break;
            case "fach":
                String subject = subjectAndCourse(course, value);
                if (columnTypes != null && columnTypes.contains("vfach")) {
                    substitution.setPreviousSubject(subject);
                } else {
                    substitution.setSubject(subject);
                }
                break;
            case "vfach":
                substitution.setSubject(subjectAndCourse(course, value));
            case "lehrer":
                Matcher bracesMatcher = bracesPattern.matcher(value);
                if (bracesMatcher.matches())
                    value = bracesMatcher.group(1);
                substitution.setTeacher(value);
                break;
            case "raum":
                if (columnTypes != null && columnTypes.contains("vraum")) {
                    substitution.setPreviousRoom(value);
                } else {
                    substitution.setRoom(value);
                }
                break;
            case "vraum":
                substitution.setRoom(value);
            case "info":
                Matcher substitutionMatcher = substitutionPattern.matcher(value);
                Matcher cancelMatcher = cancelPattern.matcher(value);
                Matcher delayMatcher = delayPattern.matcher(value);
                Matcher selfMatcher = selfPattern.matcher(value);
                if (substitutionMatcher.matches()) {
                    substitution.setPreviousSubject(substitutionMatcher.group(1));
                    substitution.setPreviousTeacher(substitutionMatcher.group(2));
                    if (!substitutionMatcher.group(3).isEmpty()) {
                        substitution.setDesc(substitutionMatcher.group(3));
                    }
                } else if (cancelMatcher.matches()) {
                    type = "Entfall";
                    substitution.setPreviousSubject(cancelMatcher.group(1));
                    substitution.setPreviousTeacher(cancelMatcher.group(2));
                } else if (delayMatcher.matches()) {
                    type = "Verlegung";
                    substitution.setPreviousSubject(delayMatcher.group(1));
                    substitution.setPreviousTeacher(delayMatcher.group(2));
                    substitution.setDesc(delayMatcher.group(3));
                } else if (selfMatcher.matches()) {
                    type = "selbst.";
                    if (!selfMatcher.group(1).isEmpty())
                        substitution.setDesc(selfMatcher.group(1));
                } else if (value.equals("fllt aus") || value.equals("Klausur") || value.equals("Aufg.")) {
                    type = value;
                } else {
                    substitution.setDesc(value);
                }
                break;
            }
            i++;
        }
        substitution.setType(type);
        substitution.setColor(colorProvider.getColor(substitution.getType()));
        if (course != null && substitution.getSubject() == null) {
            substitution.setSubject(course);
        }
        day.addSubstitution(substitution);
    }

    return day;
}

From source file:qhindex.controller.SearchAuthorWorksController.java

private AuthorWork extractAuthorWorkData(Element authorWorkElements) throws IOException {
    AuthorWork aw = new AuthorWork();
    Element titleElem = authorWorkElements.select("td.gsc_a_t > a").get(0);
    String name = titleElem.text();
    aw.setTitle(name);/*  www . ja  va2  s . c o  m*/
    String urlAuthorWork = titleElem.attr("href");
    Elements workData = authorWorkElements.select("td.gsc_a_t > div");
    if (workData.size() > 1) {
        String publisherInGoogle = workData.get(1).text();
        aw.setPublisherInGoogle(publisherInGoogle);
        aw.setPublisher(handlePublicationMedium(publisherInGoogle, urlAuthorWork));

        String authors = workData.get(0).text();
        aw.setAuthors(authors);
    }
    Elements citationsData = authorWorkElements.select("td.gsc_a_c > a");
    if (citationsData.size() > 0) {
        aw.setCitationsUrl(citationsData.get(0).attr("href"));
        int cititationsExtractedNumber = 0;
        try {
            String citationStr = citationsData.get(0).text();
            if (citationStr.length() > 0) {
                cititationsExtractedNumber = Integer.parseInt(citationStr);
            }
        } catch (Exception ex) {
            Debug.print("Exception while extracting author work data: " + ex.toString());
            resultsMsg += "Exception while extracting author work data.\n";
        }
        aw.setCitations(cititationsExtractedNumber);
    }
    return aw;
}