Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java

/**
 * ?//from   w  ww .j  ava  2s.  c o  m
 * 
 * @param document
 * @return
 * @throws Exception
 */
private Object analyzeLicenseDetail(HttpSeed seed) throws Exception {

    Document doc = parse(seed.getHtml());

    Elements eleTable = doc.select(".listmain table");
    // TR
    Elements eleTrs = eleTable.get(0).select("tr");

    // ?
    Object entity = AnalyzeUtil.getInstant(PREFIX_ENTITY_PATH + syjTableBean.getTableClass());

    // tr?trtd?
    int rowNo = 1;
    for (int i = 0; i < eleTrs.size(); i++) {
        Element eleTr = eleTrs.get(i);

        // ??trtd??nowrapnowrap?true
        if (i != eleTrs.size() - 1 && (!eleTr.select("td").get(0).hasAttr("nowrap")
                || !"true".equals(eleTr.select("td").get(0).attr("nowrap")))) {
            continue;
        }

        // td?
        String tdVal = parseDetailTr(eleTr);

        // TABLE7411??
        if (syjTableBean.getTableClass().equals("TABLE74") && rowNo == 11) {
            continue;
        }

        // entity
        AnalyzeUtil.executeMethod(entity, PREFIX_ATTRIBUTE + rowNo++, new Object[] { tdVal },
                new Class[] { String.class });
    }

    // ?ID, ?createEmpCode
    String regex = ".+?&Id=(.+?)";
    Object obj = AnalyzeUtil.regex(seed.getUrl(), regex);

    if (null == obj) {
        // ID
        AnalyzeUtil.executeMethod(entity, "setContentId", new Object[] { 0l }, new Class[] { Long.class });
    } else {
        // ID
        AnalyzeUtil.executeMethod(entity, "setContentId", new Object[] { Long.valueOf(obj.toString()) },
                new Class[] { Long.class });
    }

    // ?
    AnalyzeUtil.executeMethod(entity, "setCreateTime", new Object[] { new Timestamp(new Date().getTime()) },
            new Class[] { Timestamp.class });

    return entity;
}

From source file:me.vertretungsplan.parser.DaVinciParser.java

@NotNull
void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();

    Element titleElem;// w  w  w.ja v  a  2  s  .  c  o m
    if (doc.select("h1.list-table-caption").size() > 0) {
        titleElem = doc.select("h1.list-table-caption").first();
    } else {
        // DaVinci 5
        titleElem = doc.select("h2").first();
    }
    String title = titleElem.text();
    String klasse = null;
    // title can either be date or class
    Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}");
    Matcher dateMatcher = datePattern.matcher(title);
    if (dateMatcher.find()) {
        day.setDateString(dateMatcher.group());
        day.setDate(ParserUtils.parseDate(dateMatcher.group()));
    } else {
        klasse = title;
        String nextText = titleElem.nextElementSibling().text();
        if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
            day.setDateString(nextText);
            day.setDate(ParserUtils.parseDate(nextText));
        } else {
            // could not find date, must be multiple days
            day = null;
        }
    }

    for (Element p : doc.select(".row:has(h1.list-table-caption) p")) {
        for (TextNode node : p.textNodes()) {
            if (!node.text().trim().isEmpty() && day != null)
                day.addMessage(node.text().trim());
        }
    }
    for (Element message : doc.select(".callout")) {
        for (TextNode node : message.textNodes()) {
            if (!node.text().trim().isEmpty())
                day.addMessage(node.text().trim());
        }
    }

    Element lastChangeElem = doc.select(".row.copyright div").first();
    if (lastChangeElem == null) {
        // DaVinci 5
        lastChangeElem = doc.select("h1").first();
    }
    String lastChange = lastChangeElem.ownText();
    Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
    Matcher matcher = pattern.matcher(lastChange);
    if (matcher.find()) {
        LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm")
                .parseLocalDateTime(matcher.group(1));
        if (day != null) {
            day.setLastChange(lastChangeTime);
        } else {
            schedule.setLastChange(lastChangeTime);
        }
    } else {
        Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})");
        Matcher matcher2 = pattern2.matcher(lastChange);
        if (matcher2.find()) {
            LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm")
                    .parseLocalDateTime(matcher2.group(1));
            if (day != null) {
                day.setLastChange(lastChangeTime);
            } else {
                schedule.setLastChange(lastChangeTime);
            }
        }
    }

    if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
        Element table = doc.select(".list-table, table").first();
        parseDaVinciTable(table, schedule, klasse, day, colorProvider);
    }

    if (day != null) {
        schedule.addDay(day);
    }
}

From source file:me.vertretungsplan.parser.UntisInfoParser.java

private void parseTimetable(SubstitutionSchedule v, String lastChange, Document doc, String klasse,
        String weekName) throws JSONException {
    v.setLastChange(ParserUtils.parseDateTime(lastChange));
    LocalDate weekStart = DateTimeFormat.forPattern("d.M.yyyy").parseLocalDate(weekName);

    Element table = doc.select("table").first();

    List<SubstitutionScheduleDay> days = new ArrayList<>();
    for (int i = 0; i < table.select("tr").first().select("td:gt(0)").size(); i++) {
        LocalDate date = weekStart.plusDays(i);

        SubstitutionScheduleDay day = null;
        for (SubstitutionScheduleDay d : v.getDays()) {
            if (d.getDate().equals(date)) {
                day = d;//  w  ww .  java  2 s  . c om
                break;
            }
        }
        if (day == null) {
            day = new SubstitutionScheduleDay();
            day.setDate(date);
            v.addDay(day);
        }
        days.add(day);
    }

    Elements rows = table.select("> tbody > tr:gt(0)");
    Map<Integer, String> lessons = new HashMap<>();

    int i = 0;
    int lessonCounter = 1;
    while (i < rows.size()) {
        Element cell = rows.get(i).select("td").first();
        String lessonName = cell.text().trim();
        if (lessonName.length() > 3) {
            lessonName = String.valueOf(lessonCounter);
        }
        lessons.put(i, lessonName);
        i += getRowspan(cell);
        lessonCounter += 1;
    }

    // counts the number of columns that will be missing from each row due to a cell with colspan
    Map<Integer, Integer> columnsToSkip = new HashMap<>();
    for (int j = 0; j < rows.size(); j++) {
        columnsToSkip.put(j, 0);
    }

    for (int col = 1; col < days.size(); col++) {
        int row = 0;
        while (row < rows.size()) {
            Element cell = rows.get(row).select("> td").get(col - columnsToSkip.get(row));
            String lesson = getTimetableLesson(cell, row, lessons);

            days.get(col - 1).addAllSubstitutions(
                    parseTimetableCell(cell, lesson, klasse, data.getJSONArray("cellFormat"), colorProvider));

            for (int skippedRow = row + 1; skippedRow < row + getRowspan(cell); skippedRow++) {
                columnsToSkip.put(skippedRow, columnsToSkip.get(skippedRow) + 1);
            }

            row += getRowspan(cell);
        }
    }
}

From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java

private HttpPost getForm(final HttpContext httpContext, final HttpResponse response, final String user,
        final String password) throws IllegalStateException, IOException {
    final HttpEntity entity = response.getEntity();
    final HttpHost host = (HttpHost) httpContext.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
    final String requestUri = getLatestRedirectedUrl(httpContext);
    final String requestBaseUrl = requestUri.substring(0, requestUri.lastIndexOf('/'));
    final String userFormId = getHtmlElementId(host, FormId.USER);
    final String passFormId = getHtmlElementId(host, FormId.PASS);
    final String loginFormId = getHtmlElementId(host, FormId.LOGIN_FORM);
    final String loginButton = getSsoErrorHandler(host).getSsoLoginButtonName();

    log.debug("Looking for HTML input form retrieved from " + requestUri);

    final List<NameValuePair> formNvps = new ArrayList<NameValuePair>();

    final Document doc = Jsoup.parse(entity.getContent(), "UTF-8", requestBaseUrl);
    final org.jsoup.nodes.Element form = doc
            .select("form" + (loginFormId == null ? "" : "[id=" + loginFormId + "]")).first();
    final String formAction = form.attr("action");
    final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction));
    final Elements formFields = form.select("input");
    for (final Element element : formFields) {
        final String fieldName = element.attr("name");
        String fieldValue = element.attr("value");
        final String fieldId = element.attr("id");

        log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue,
                fieldId));// w ww  .j  a  va2  s .  c om

        if (fieldId.equalsIgnoreCase(userFormId)) {
            fieldValue = user;
            log.debug(String.format("Set formField user='%s'", user));
        } else if (fieldId.equalsIgnoreCase(passFormId)) {
            log.debug("Set formField password='*******'");
            fieldValue = password;
        }

        if (loginButton != null && element.attr("type").equalsIgnoreCase("submit")) {
            if (element.attr("name").equalsIgnoreCase(loginButton)) {
                formNvps.add(new BasicNameValuePair(fieldName, fieldValue));
            }
        } else {
            formNvps.add(new BasicNameValuePair(fieldName, fieldValue));
        }
    }

    formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8"));
    return formPost;
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

protected void addSortingSearchFields(List<SearchField> fields) throws IOException, JSONException {
    final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    for (int i = 0; i < 3; i++) {
        final Element tr = doc.select("#sort_editor tr.sort_" + i).first();
        final DropdownSearchField field = new DropdownSearchField();
        field.setMeaning(SearchField.Meaning.ORDER);
        field.setId("sort_" + i);
        field.setDisplayName(tr.select("td").first().text());
        field.addDropdownValue("", "");
        for (final Element option : tr.select(".crit option")) {
            if (option.hasAttr("selected")) {
                field.addDropdownValue(0, option.attr("value"), option.text());
            } else {
                field.addDropdownValue(option.attr("value"), option.text());
            }//from   ww  w . j av a 2s  . c  o  m
        }
        fields.add(field);
    }
}

From source file:org.epop.dataprovider.googlescholar.GoogleScholarProvider.java

@Override
protected List<Literature> parsePage(Reader page) throws DatalayerException {
    List<Literature> papers = new ArrayList<Literature>();
    Document doc = null;//  w  ww.j  ava 2 s .  com
    try {
        StringBuilder builder = new StringBuilder();
        int charsRead = -1;
        char[] chars = new char[100];
        do {
            charsRead = page.read(chars, 0, chars.length);
            // if we have valid chars, append them to end of string.
            if (charsRead > 0)
                builder.append(chars, 0, charsRead);
        } while (charsRead > 0);
        doc = Jsoup.parse(builder.toString());
    } catch (IOException e) {
        e.printStackTrace();
    } // for (Document doc : docs) {

    for (Element article : doc.select(".gs_r")) {
        try {

            LiteratureBuilder litBuilder = new LiteratureBuilder();

            // type
            String typeString = article.select(".gs_ct2").text();
            if (typeString == null)
                typeString = "";
            if (typeString.equals("[C]"))
                continue; // skip citations
            litBuilder.setType(getLiteratureType(typeString));

            // title
            String title = article.select(".gs_rt a").text();
            title = title.replaceAll("\u0097", "-");
            title = title.replaceAll("", "...");
            if (title.isEmpty())
                throw new DatalayerException("title retrieved by parsing is empty");
            litBuilder.setTitle(title);

            // website URL
            if (litBuilder.getWebsiteURLs() == null)
                litBuilder.setWebsiteURLs(new HashSet<Link>());
            try {
                String linkURL = article.select(".gs_rt a").attr("href");
                litBuilder.getWebsiteURLs().add(new Link(linkURL));
            } catch (URISyntaxException e2) {
                // TODO Auto-generated catch block
                e2.printStackTrace();
            }
            try {
                // cluster link
                String googleLinkURL = "http://scholar.google.com"
                        + article.select(".gs_fl .gs_nph").attr("href");
                litBuilder.getWebsiteURLs().add(new Link("Google Scholar", googleLinkURL));
                // scholar ID
                Matcher idMatcher = ID_PATTERN.matcher(googleLinkURL);
                if (idMatcher.find())
                    litBuilder.setgScholarID(idMatcher.group(1));
                // else
                // TODO error handling
            } catch (URISyntaxException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }

            String abstractText = article.select(".gs_rs").text();
            litBuilder.setAbstractText(abstractText);

            String rawHTML = article.select(".gs_a").html();
            if (rawHTML.isEmpty()) // no authors
                continue;

            // split by " - " (authors - publication, year - publisher)
            String[] splits = rawHTML.split(" - ");
            //            if (splits.length != 3)
            //               throw new DatalayerException(
            //                     "dashTokenizer should have three sections (authors - publication, year - publisher), found "
            //                           + splits.length
            //                           + "; maybe Google Scholar layout has changed");
            String namesHTML = "", publicationHTML = "", publisherHTML = "";
            if (splits.length > 0) {
                namesHTML = splits[0];
                namesHTML = namesHTML.replace(", ", "");
            }
            if (splits.length == 2) {
                publisherHTML = splits[1];
            }
            if (splits.length > 3) {
                publicationHTML = splits[1];
                publisherHTML = splits[2];
            }

            // authors
            try {
                List<Author> authors = getAuthorsFromHTMLSection(namesHTML);
                litBuilder.setAuthors(new HashSet<>(authors));
            } catch (PatternMismatchException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }

            // publication
            String[] commaSplit = publicationHTML.split(", ");
            if (commaSplit.length == 2) {
                String publication = commaSplit[0];
                publication = publication.replaceAll("\u0097", "-");
                publication = publication.replaceAll("", "...");
                litBuilder.setPublicationContext(publication);
                try {
                    Integer year = Integer.parseInt(commaSplit[1]);
                    litBuilder.setYear(year);
                } catch (NumberFormatException e) {
                    // throw new ServiceException(
                    // "publicationHTML subsection has invalid format: failed to parse publication year");
                    // TODO (low) logging

                }
            } else {
                // TODO logging/notify user
            }

            // publisher
            litBuilder.setPublisher(publisherHTML);

            // citations
            String citedby = article.select(".gs_fl a[href*=cites]").text();
            Matcher cm = CITES_PATTERN.matcher(citedby);
            try {
                int cites = cm.find() ? Integer.parseInt(cm.group(1)) : 0;
                litBuilder.setgScholarNumCitations(cites);
            } catch (NumberFormatException e) {
                // TODO
            }

            // fulltext
            String fulltextURL = article.select("div.gs_md_wp.gs_ttss a").attr("href");
            Set<Link> fullLinks = new HashSet<>();
            try {
                fullLinks.add(new Link(fulltextURL));
                litBuilder.setFulltextURLs(fullLinks);
            } catch (URISyntaxException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            papers.add(litBuilder.getObject());

        } catch (Exception e) {
            malformed.add(e.getMessage());
        }
    }
    // }

    // if (headerContent.startsWith("User profiles")) {
    // // only the first part
    // Element hrefPart = seg.getAllElements().get(0);
    // String link = hrefPart.getAttributeValue("href");
    // assert link.startsWith("/citations");
    // String[] data = link.split("[?|&|=]");
    // System.out.println("id found for user " + data[2]);
    // return GoogleScholarGetterFromId.getFromId(data[2]);
    //
    // docs.clear();
    System.err.println(malformed + " " + malformed.size());

    return papers;
}

From source file:org.confab.PhpBB3Parser.java

/**
 * Parses each post for a particular topic.
 * @param  html         Html containing the posts to be parsed 
 * @return              List of Post objects 
 *//*from ww  w  .  j  a  va  2 s .  c om*/
public List<Post> parsePosts(Document html, ForumThread parent) {
    Utilities.debug("Starting parsePosts");
    List<Post> ret = new ArrayList<Post>();

    // Each post should have it's own table
    Elements div_posts = html.select("div#posts");
    assert !div_posts.isEmpty();
    Elements posts_table = div_posts.select("table[id~=(post\\d+)]");
    assert !posts_table.isEmpty();

    for (Element el_post : posts_table) {
        Post new_post = new Post(parent);

        // Get post id (id=post\d+)
        new_post.id = el_post.attr("id").replace("post", "").trim();
        assert new_post.id != null;

        // Get post message 
        Elements el_message = el_post.select("div[id~=(post_message_\\d+)]");
        assert !el_message.isEmpty();
        new_post.message = el_message.first().text();
        assert new_post.message != null;
        Utilities.debug("new_post.message: " + new_post.message);

        // Get post author
        Elements el_author = el_post.select(".bigusername");
        assert !el_author.isEmpty();
        new_post.author.username = el_author.first().text();
        assert new_post.author != null;
        Utilities.debug("new_post.author: " + new_post.author);

        ret.add(new_post);
    }

    Utilities.debug("Finished parsePosts");
    return ret;
}

From source file:qhindex.controller.SearchAuthorWorksController.java

private CitingWork extractCitationWork(Element citationElements) {
    CitingWork citingWork = new CitingWork();
    // Extract title(name) and url
    Elements nameElems = citationElements.select("h3.gs_rt > a");
    String urlCitationWork = "";
    if (nameElems.size() > 0) {
        citingWork.setName(nameElems.get(0).text());
        urlCitationWork = nameElems.get(0).attr("href");
        citingWork.setUrl(urlCitationWork);
    }//from   w  w  w  .ja  va2s  .  c o m
    // Extract authors, publisher in google/external web/calculated
    Elements publicationElem = citationElements.select("div.gs_a");
    if (publicationElem.size() > 0) {
        String citationData = publicationElem.get(0).text();
        // Get the author data
        citingWork.setAuthors(divideStringAt(citationData, " - ", true));
        // Remove Authors data to extract published in value
        String citationWithoutAuthors = divideStringAt(citationData, " - ", false);
        // Remove the year data
        String publisher = divideStringAt(citationWithoutAuthors, ", ", true);
        citingWork.setPublisherInGoogle(publisher);
        // Assumes that only one sequence "..." exist AT THE END to indicate the name is incomplete
        if (publisher.contains("") && publisher.length() > 5 && urlCitationWork.length() > 0) {
            try {
                // Remove the " ..." string at the end
                String publisherNameIncompleteWithoutDots = publisher.replace("", "");
                String resolvedPublisher = resolvePublisher(urlCitationWork,
                        publisherNameIncompleteWithoutDots);
                citingWork.setPublisherInExternalWeb(resolvedPublisher);
                publisher = handlePublicationNameCases(resolvedPublisher);
            } catch (IOException ioEx) {
                Debug.print("Exception while extracting citing work: " + ioEx.toString());
                resultsMsg += "Exception while extracting citing work.\n";
            }
        } else {
            publisher = removeNonLetterCharsAtBeginning(publisher);
            publisher = handlePublicationNameCases(publisher);
            publisher = correctAuthorWorkPublisher(publisher);
        }
        citingWork.setPublisher(publisher);
    }
    // Extract citation number
    Elements citationElems = citationElements.select("div.gs_fl > a");
    if (citationElems.size() > 0) {
        String citationNumberData = citationElems.get(0).text();
        citationNumberData = citationNumberData.replace("Cited by ", "");
        citingWork.setCitationsNumber(citationNumberData);
    }
    return citingWork;
}

From source file:gov.medicaid.screening.dao.impl.MedicalPracticeLicenseDAOBean.java

/**
 * Parse the License information./*from  w w  w .  j av a 2  s.  c o m*/
 *
 * @param city the license provider city
 * @param details the details page
 * @return the parsed license
 */
private License parseLicense(String city, Document details) {
    License license = new License();
    license.setCity(city);

    ProviderProfile profile = new ProviderProfile();
    license.setProfile(profile);

    String fullName = details.select("#_ctl7_lblName").text();
    User user = new User();
    String[] nameParts = fullName.split(",");
    if (nameParts.length > 0) {
        user.setLastName(nameParts[0].trim());
    }
    if (nameParts.length > 1) {
        user.setFirstName(nameParts[1].trim());
    }
    profile.setUser(user);

    String licenseType = details.select("#_ctl7_ProfileInfoLicense_lblLicType").text();
    LicenseType licType = new LicenseType();
    licType.setName(licenseType);
    license.setType(licType);

    String licenseNo = details.select("#_ctl7_ProfileInfoLicense_lblLicNbr").text();
    license.setLicenseNumber(licenseNo);

    String licensureAddress1 = details.select("#_ctl7_ProfileInfoPublic_lblAddress").text();
    String licensureAddress2 = details.select("#_ctl7_ProfileInfoPublic_lblAddress2").text();
    String licensureCityState = details.select("#_ctl7_ProfileInfoPublic_lblCity").text();
    Address address = new Address();
    address.setLocation(licensureAddress1 + " " + licensureAddress2);
    setCityStateZip(address, licensureCityState);

    String email = details.select("#_ctl7_ProfileInfoPublic_lblEmail").text();
    profile.setContactEmail(email);
    String birthYear = details.select("#_ctl7_ProfileInfoPublic_lblBirthYear").text();
    if (Util.isNotBlank(birthYear)) {
        profile.setDob(new GregorianCalendar(Integer.parseInt(birthYear), Calendar.JANUARY, 1).getTime());
    }

    String gender = details.select("#_ctl7_ProfileInfoPublic_lblGender").text();
    if ("Male".equals(gender)) {
        profile.setSex(Sex.MALE);
    } else if ("Female".equals(gender)) {
        profile.setSex(Sex.FEMALE);
    }

    String expirationDate = details.select("#_ctl7_ProfileInfoLicense_lblExpDate").text();
    String originalIssueDate = details.select("#_ctl7_ProfileInfoLicense_lblGrantDate").text();

    Date issueDate = parseDate(originalIssueDate, DATE_FORMAT);
    if (issueDate != null) {
        license.setOriginalIssueDate(issueDate);
    }

    Date expireDate = parseDate(expirationDate, DATE_FORMAT);
    if (expireDate != null) {
        license.setExpireDate(expireDate);
    }

    String licenseStatus = details.select("#_ctl7_ProfileInfoLicense_lblLicStatus").text();
    LicenseStatus status = new LicenseStatus();
    status.setName(licenseStatus);
    license.setStatus(status);

    String disciplinaryAction = details.select("#_ctl7_ProfileInfoLicense_lblDiscAction").text();
    String correctiveAction = details.select("#_ctl7_ProfileInfoLicense_lblCorrAction").text();

    license.setDiscipline(!"No".equals(disciplinaryAction.trim()));
    license.setCorrectiveAction(!"No".equals(correctiveAction.trim()));

    String medSchool = details.select("#_ctl7_ProfileInfoEducation_lblName").text();
    MedicalSchool medicalSchool = new MedicalSchool();
    medicalSchool.setName(medSchool);
    license.setMedicalSchool(medicalSchool);

    String degree = details.select("#_ctl7_ProfileInfoEducation_lblDegree").text();
    if ("PhD".equals(degree.trim())) {
        profile.setDegree(Degree.DOCTORATE);
    } else if (!Util.isBlank(degree)) {
        profile.setDegree(Degree.MASTER);
    }

    PrivatePractice privatePractice = new PrivatePractice();
    profile.setPrivatePractice(privatePractice);

    String primaryAddressName = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryName").text();
    String primaryAddress1 = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryAddress").text();
    String primaryCityState = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryAddress2").text();
    String primaryPhone = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryPhone").text();
    if (Util.isNotBlank(primaryAddressName) || Util.isNotBlank(primaryAddress1)
            || Util.isNotBlank(primaryCityState)) {
        Address primary = new Address();
        address.setLocation(primaryAddressName + " " + primaryAddress1);
        setCityStateZip(primary, primaryCityState);
        privatePractice.setOfficeAddress(primary);
    }
    privatePractice.setOfficePhoneNumber(primaryPhone);

    String secondaryAddressName = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryName").text();
    String secondaryAddress1 = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryAddress").text();
    String secondaryCityState = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryAddress2").text();
    String secondaryPhone = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryPhone").text();

    if (Util.isNotBlank(secondaryAddressName) || Util.isNotBlank(secondaryAddress1)
            || Util.isNotBlank(secondaryCityState)) {
        Address secondary = new Address();
        address.setLocation(secondaryAddressName + " " + secondaryAddress1);
        setCityStateZip(secondary, secondaryCityState);
        privatePractice.setSecondaryAddress(secondary);
    }

    privatePractice.setSecondaryPhoneNumber(secondaryPhone);

    Elements specialties = details.select("#_ctl7_ProfileInfoSpecialty_dgSpecialty tr:gt(0)");
    List<Specialty> sps = new ArrayList<Specialty>();
    for (Element element : specialties) {
        Specialty sp = new Specialty();
        SpecialtyType spt = new SpecialtyType();
        spt.setName(element.select("td:eq(0)").text());
        sp.setType(spt);
        sp.setName(element.select("td:eq(1)").text());
        sps.add(sp);
    }
    profile.setSpecialties(sps);
    return license;
}

From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parst eine Vertretungstabelle eines Untis-Vertretungsplans
 * /*from w  w  w  .  j  a  v  a  2s  . c om*/
 * @param table
 *            das <code>table</code>-Element des HTML-Dokuments, das geparst
 *            werden soll
 * @param data
 *            Daten von der Schule (aus <code>Schule.getData()</code>)
 * @param tag
 *            der {@link VertretungsplanTag} in dem die Vertretungen
 *            gespeichert werden sollen
 * @throws JSONException
 */
protected void parseVertretungsplanTable(Element table, JSONObject data, VertretungsplanTag tag)
        throws JSONException {
    if (data.optBoolean("class_in_extra_line")) {
        for (Element element : table.select("td.inline_header")) {
            String className = getClassName(element.text(), data);
            if (isValidClass(className)) {
                KlassenVertretungsplan kv = new KlassenVertretungsplan(className);

                Element zeile = null;
                try {
                    zeile = element.parent().nextElementSibling();
                    if (zeile.select("td") == null) {
                        zeile = zeile.nextElementSibling();
                    }
                    while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) {
                        Vertretung v = new Vertretung();

                        int i = 0;
                        for (Element spalte : zeile.select("td")) {
                            if (!hasData(spalte.text())) {
                                i++;
                                continue;
                            }
                            String type = data.getJSONArray("columns").getString(i);
                            if (type.equals("lesson"))
                                v.setLesson(spalte.text());
                            else if (type.equals("subject"))
                                v.setSubject(spalte.text());
                            else if (type.equals("previousSubject"))
                                v.setPreviousSubject(spalte.text());
                            else if (type.equals("type"))
                                v.setType(spalte.text());
                            else if (type.equals("type-entfall")) {
                                if (spalte.text().equals("x"))
                                    v.setType("Entfall");
                                else
                                    v.setType("Vertretung");
                            } else if (type.equals("room"))
                                v.setRoom(spalte.text());
                            else if (type.equals("teacher"))
                                v.setTeacher(spalte.text());
                            else if (type.equals("previousTeacher"))
                                v.setPreviousTeacher(spalte.text());
                            else if (type.equals("desc"))
                                v.setDesc(spalte.text());
                            else if (type.equals("desc-type")) {
                                v.setDesc(spalte.text());
                                v.setType(recognizeType(spalte.text()));
                            } else if (type.equals("previousRoom"))
                                v.setPreviousRoom(spalte.text());
                            i++;
                        }

                        if (v.getDesc() != null && v.getLesson() == null && v.getPreviousRoom() == null
                                && v.getPreviousSubject() == null && v.getPreviousTeacher() == null
                                && v.getRoom() == null && v.getSubject() == null && v.getTeacher() == null
                                && v.getType() == null) {
                            // Beschreibung aus der letzten Zeile fortgesetzt
                            Vertretung previousVertretung = kv.getVertretung()
                                    .get(kv.getVertretung().size() - 1);
                            previousVertretung.setDesc(previousVertretung.getDesc() + " " + v.getDesc());
                            zeile = zeile.nextElementSibling();
                            continue;
                        }

                        if (v.getType() == null)
                            v.setType("Vertretung");

                        if (!v.getLesson().equals("")) {
                            kv.add(v);
                        }

                        zeile = zeile.nextElementSibling();

                    }
                    tag.getKlassen().put(className, kv);
                } catch (Throwable e) {

                    e.printStackTrace();
                }
            }
        }
    } else {
        boolean hasType = false;
        for (int i = 0; i < data.getJSONArray("columns").length(); i++) {
            if (data.getJSONArray("columns").getString(i).equals("type"))
                hasType = true;
        }
        Vertretung previousVertretung = null;
        for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), "
                + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]:has(font[color]))")) {
            Vertretung v = new Vertretung();
            String klassen = "";
            int i = 0;
            for (Element spalte : zeile.select("td")) {
                if (!hasData(spalte.text())) {
                    i++;
                    continue;
                }
                String type = data.getJSONArray("columns").getString(i);
                if (type.equals("lesson"))
                    v.setLesson(spalte.text());
                else if (type.equals("subject"))
                    v.setSubject(spalte.text());
                else if (type.equals("previousSubject"))
                    v.setPreviousSubject(spalte.text());
                else if (type.equals("type"))
                    v.setType(spalte.text());
                else if (type.equals("type-entfall")) {
                    if (spalte.text().equals("x"))
                        v.setType("Entfall");
                    else if (!hasType)
                        v.setType("Vertretung");
                } else if (type.equals("room"))
                    v.setRoom(spalte.text());
                else if (type.equals("previousRoom"))
                    v.setPreviousRoom(spalte.text());
                else if (type.equals("desc"))
                    v.setDesc(spalte.text());
                else if (type.equals("desc-type")) {
                    v.setDesc(spalte.text());
                    v.setType(recognizeType(spalte.text()));
                } else if (type.equals("teacher"))
                    v.setTeacher(spalte.text());
                else if (type.equals("previousTeacher"))
                    v.setPreviousTeacher(spalte.text());
                else if (type.equals("class"))
                    klassen = getClassName(spalte.text(), data);
                i++;
            }

            if (v.getDesc() != null && v.getLesson() == null && v.getPreviousRoom() == null
                    && v.getPreviousSubject() == null && v.getPreviousTeacher() == null && v.getRoom() == null
                    && v.getSubject() == null && v.getTeacher() == null && v.getType() == null
                    && previousVertretung != null) {
                // Beschreibung aus der letzten Zeile fortgesetzt
                previousVertretung.setDesc(previousVertretung.getDesc() + " " + v.getDesc());
                continue;
            }

            if (v.getType() == null) {
                if (zeile.select("strike").size() > 0 || (v.getSubject() == null && v.getRoom() == null
                        && v.getTeacher() == null && v.getPreviousSubject() != null))
                    v.setType("Entfall");
                else
                    v.setType("Vertretung");
            }

            List<String> affectedClasses;

            // Detect things like "5-12"
            Pattern pattern = Pattern.compile("(\\d+) ?- ?(\\d+)");
            Matcher matcher = pattern.matcher(klassen);
            if (matcher.find()) {
                affectedClasses = new ArrayList<String>();
                int min = Integer.parseInt(matcher.group(1));
                int max = Integer.parseInt(matcher.group(2));
                try {
                    for (String klasse : getAllClasses()) {
                        Pattern pattern2 = Pattern.compile("\\d+");
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.find()) {
                            int num = Integer.parseInt(matcher2.group());
                            if (min <= num && num <= max)
                                affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else {
                if (data.optBoolean("classes_separated", true)) {
                    affectedClasses = Arrays.asList(klassen.split(", "));
                } else {
                    affectedClasses = new ArrayList<String>();
                    try {
                        for (String klasse : getAllClasses()) { // TODO:
                            // Gibt es
                            // eine
                            // bessere
                            // Mglichkeit?
                            StringBuilder regex = new StringBuilder();
                            for (char character : klasse.toCharArray()) {
                                regex.append(character);
                                regex.append(".*");
                            }
                            if (klassen.matches(regex.toString()))
                                affectedClasses.add(klasse);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

            for (String klasse : affectedClasses) {
                if (isValidClass(klasse)) {
                    KlassenVertretungsplan kv = tag.getKlassen().get(klasse);
                    if (kv == null)
                        kv = new KlassenVertretungsplan(klasse);
                    kv.add(v);
                    tag.getKlassen().put(klasse, kv);
                }
            }
            previousVertretung = v;
        }
    }
    if (data.optBoolean("sort_classes")) {
        List<KlassenVertretungsplan> list = new ArrayList<>(tag.getKlassen().values());
        Collections.sort(list, new Comparator<KlassenVertretungsplan>() {
            @Override
            public int compare(KlassenVertretungsplan o1, KlassenVertretungsplan o2) {
                return o1.getKlasse().compareTo(o2.getKlasse());
            }
        });
        LinkedHashMap<String, KlassenVertretungsplan> hashMap = new LinkedHashMap<>();
        for (KlassenVertretungsplan klasse : list) {
            hashMap.put(klasse.getKlasse(), klasse);
        }
        tag.setKlassen(hashMap);
    }
}