Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:org.apache.karaf.cave.server.storage.CaveRepositoryImpl.java

/**
 * Populate the Cave repository using the given URL.
 *
 * @param url    the "source" HTTP URL.//from  w  w w.j a va  2s .  c o  m
 * @param filter regex filter. Only artifacts URL matching the filter will be considered.
 * @param update true if the OBR metadata should be updated, false else.
 * @throws Exception in case of populate failure.
 */
private void populateFromHttp(String url, String filter, boolean update) throws Exception {
    LOGGER.debug("Populating from HTTP URL {}", url);
    HttpClient httpClient = new DefaultHttpClient();

    HttpGet httpGet = new HttpGet(url);
    HttpResponse response = httpClient.execute(httpGet);
    HttpEntity entity = response.getEntity();

    if (entity != null) {
        if (entity.getContentType().getValue().equals("application/java-archive")
                || entity.getContentType().getValue().equals("application/octet-stream")) {
            // I have a jar/binary, potentially a resource
            try {
                if ((filter == null) || (url.matches(filter))) {
                    ResourceImpl resource = (ResourceImpl) new DataModelHelperImpl()
                            .createResource(new URL(url));
                    if (resource != null) {
                        LOGGER.debug("Copy {} into the Cave repository storage", url);
                        int index = url.lastIndexOf("/");
                        if (index > 0) {
                            url = url.substring(index);
                        }
                        File destination = new File(new File(this.getLocation()), url);
                        FileOutputStream outputStream = new FileOutputStream(destination);
                        entity.writeTo(outputStream);
                        outputStream.flush();
                        outputStream.close();
                        if (update) {
                            resource = (ResourceImpl) new DataModelHelperImpl()
                                    .createResource(destination.toURI().toURL());
                            LOGGER.debug("Update OBR metadata with {}", resource.getId());
                            this.addResource(resource);
                        }
                    }
                }
            } catch (IllegalArgumentException e) {
                LOGGER.warn(e.getMessage());
            }
        } else {
            // try to find link to "browse"
            Document document = Jsoup.connect(url).get();

            Elements links = document.select("a");
            if (links.size() > 1) {
                for (int i = 1; i < links.size(); i++) {
                    Element link = links.get(i);
                    String absoluteHref = link.attr("abs:href");
                    this.populateFromHttp(absoluteHref, filter, update);
                }
            }
        }
    }
}

From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java

/**
 * Parse the keywords out of the meta tag if one exists. Otherwise, return an
 * empty list./*from w  ww.  ja v  a  2 s  .  c  o  m*/
 *
 * @param doc The Document ot parse
 * @return The list of keywords.
 */
private List<String> getKeywordsFromDocument(Document doc) {
    List<String> keywords = new ArrayList<String>();
    Elements keywordsElements = doc.select("meta[name=keywords]");
    for (Element keywordsElement : keywordsElements) {
        for (String keyword : keywordsElement.attr("content").split(",")) {
            keywords.add(keyword.trim());
        }
    }
    return keywords;
}

From source file:org.confab.PhpBB3Parser.java

/**
* Constructs and submits a POST with the appropriate parameters to login to a vbulletin.
* @param  rootURL     Base or root URL for the site to log into 
* @param  username    User's login name/* w  w  w. ja  v a 2  s . com*/
* @param  password    User's password
* @return             User object initialised with a HttpContext
*/
public User login(String rootURL, String username, String password) {
    Utilities.debug("login");

    User ret = new User(username, password);

    CookieStore cookieStore = new BasicCookieStore();
    HttpContext localContext = new BasicHttpContext();
    localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);

    try {
        // set up the POST
        HttpPost httppost = new HttpPost(rootURL + "login.php");
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();
        nvps.add(new BasicNameValuePair("do", "login"));
        nvps.add(new BasicNameValuePair("vb_login_username", username));
        nvps.add(new BasicNameValuePair("vb_login_password", ""));
        nvps.add(new BasicNameValuePair("s", ""));
        nvps.add(new BasicNameValuePair("securitytoken", "guest"));
        nvps.add(new BasicNameValuePair("do", "login"));
        nvps.add(new BasicNameValuePair("vb_login_md5password", Utilities.md5(password)));
        nvps.add(new BasicNameValuePair("vb_login_md5password_utf", Utilities.md5(password)));
        httppost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

        // execute the POST 
        Utilities.debug("Executing POST");
        HttpResponse response = httpclient.execute(httppost, localContext);
        Utilities.debug("POST response: " + response.getStatusLine());
        assert response.getStatusLine().getStatusCode() == 200;

        //TODO: store the cookies
        //http://bit.ly/e7yY5i (CookieStore javadoc)

        Utilities.printCookieStore(cookieStore);

        // confirm we are logged in 
        HttpGet httpget = new HttpGet(rootURL);
        response = httpclient.execute(httpget, localContext);
        HttpEntity entity = response.getEntity();
        Document page = Jsoup.parse(EntityUtils.toString(entity));
        EntityUtils.consume(entity);
        assert page != null;

        Utilities.debug("Checking that we are logged in..");
        Element username_box = page.select("input[name=vb_login_username]").first();
        assert username_box == null;
        Element password_box = page.select("input[name=vb_login_password]").first();
        assert password_box == null;

        // parse the user's new securitytoken
        Element el_security_token = page.select("input[name=securitytoken]").first();
        assert el_security_token != null;
        String security_token = el_security_token.attr("value");
        assert security_token != null;
        String[] token_array = security_token.split("-");
        assert token_array.length == 2;
        ret.vb_security_token = token_array[1];
        assert ret.vb_security_token.length() == 40;
        Utilities.debug("securitytoken: " + ret.vb_security_token);

        Utilities.debug("Login seems ok");
        ret.httpContext = localContext;
    } catch (IOException e) {
        System.out.println(e);
    }

    Utilities.debug("end login");
    return ret;
}

From source file:me.vertretungsplan.parser.UntisInfoParser.java

@Override
public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {
    new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore);

    Document navbarDoc = Jsoup.parse(getNavbarDoc().replace("&nbsp;", ""));
    Element select = navbarDoc.select("select[name=week]").first();

    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    String info = navbarDoc.select(".description").text();
    String lastChange;/*from  w w w .ja v a 2  s  . co m*/
    try {
        lastChange = info.substring(info.indexOf("Stand:") + "Stand:".length()).trim();
    } catch (Exception e) {
        try {
            String infoHtml = httpGet(baseUrl + "/frames/title.htm", data.optString(PARAM_ENCODING, null));
            Document infoDoc = Jsoup.parse(infoHtml);
            String info2 = infoDoc.select(".description").text();
            lastChange = info2.substring(info2.indexOf("Stand:") + "Stand:".length()).trim();
        } catch (Exception e1) {
            lastChange = "";
        }
    }

    int successfulWeeks = 0;
    HttpResponseException lastException = null;
    for (Element option : select.children()) {
        String week = option.attr("value");
        String weekName = option.text();
        if (data.optBoolean(PARAM_SINGLE_CLASSES, data.optBoolean("single_classes", false)) // backwards compatibility
                || data.optString(PARAM_SCHEDULE_TYPE, "substitution").equals("timetable")) {
            int classNumber = 1;
            for (String klasse : getAllClasses()) {
                String url = getScheduleUrl(week, classNumber, data);
                try {
                    parsePage(v, lastChange, klasse, url, weekName);
                } catch (HttpResponseException e) {
                    if (e.getStatusCode() == 500) {
                        // occurs in Hannover_MMBS
                        classNumber++;
                        continue;
                    } else {
                        throw e;
                    }
                }

                classNumber++;
            }
            successfulWeeks++;
        } else {
            String url = getScheduleUrl(week, 0, data);
            try {
                parsePage(v, lastChange, null, url, weekName);
                successfulWeeks++;
            } catch (HttpResponseException e) {
                lastException = e;
            }
        }
    }
    if (successfulWeeks == 0 && lastException != null) {
        throw lastException;
    }
    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());
    v.setWebsite(baseUrl + "/default.htm");
    return v;
}

From source file:net.kevxu.purdueassist.course.CatalogDetail.java

private CatalogDetailEntry parseDocument(Document document)
        throws HtmlParseException, CourseNotFoundException, IOException {
    CatalogDetailEntry entry = new CatalogDetailEntry(subject, cnbr);
    Elements tableElements = document.getElementsByAttributeValue("summary",
            "This table lists the course detail for the selected term.");
    if (tableElements.isEmpty() != true) {
        // get name
        try {/*from  w w w.j  av a2 s .c  om*/
            Element body = tableElements.first().select("tbody").first();
            String nameBlock = body.select("tr td.nttitle").first().text();
            String[] temp = nameBlock.split(subject.name() + " " + String.valueOf(cnbr));
            String name = temp[temp.length - 1].substring(3);
            entry.setName(name);

            // get description
            body = body.select(".ntdefault").first();
            String text = body.text();
            int split = text.indexOf("Levels:");
            String description = text.substring(0, split);
            description = description.substring(20);
            entry.setDescription(description);

            // get levels
            int begin = split;
            int end = text.indexOf("Schedule Types:");
            String levels = text.substring(begin + 8, end);
            temp = levels.split("[ ,]");
            List<String> lvs = new ArrayList<String>();
            for (String s : temp)
                if (!s.equals("")) {
                    lvs.add(s);
                }
            entry.setLevels(lvs);

            // get type and prerequisites
            List<Type> types = new ArrayList<Type>();
            List<String> preq = new ArrayList<String>();
            Elements parsing_A = body.select("a");
            for (Element e : parsing_A) {
                if (e.attr("href").contains("schd_in") && !(e.attr("href").contains("%"))) {

                    try {
                        types.add(Type.valueOf(e.text().replace(" ", "")));
                    } catch (Exception exception) {
                        throw new HtmlParseException();
                    }
                } else if (e.attr("href").contains("sel_attr=")) {
                    preq.add(e.text());
                }
            }
            if (types.size() > 0)
                entry.setType(types);
            if (preq.size() > 0)
                entry.setPrerequisites(preq);

            // get offered by
            begin = text.indexOf("Offered By:");
            end = text.indexOf("Department:");
            if (end < 0)
                end = text.indexOf("Course Attributes:");
            if (end > 0) {
                entry.setOfferedBy(text.substring(begin + 12, end - 1));
            }

            // get department
            begin = text.indexOf("Department:");
            if (begin > 0) {
                end = text.indexOf("Course Attributes:");
                entry.setDepartment((text.substring(begin + 12, end - 1)));
            }

            // get campus
            begin = text.indexOf("May be offered at any of the following campuses:");
            String campuses;
            end = text.indexOf("Repeatable for Additional Credit:");
            if (end < 0)
                end = text.indexOf("Learning Objectives:");
            if (end < 0)
                end = text.indexOf("Restrictions:");
            if (end < 0)
                end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (end < 0) {
                campuses = text
                        .substring(begin + "May be offered at any of the following campuses:".length() + 5);
            } else {
                campuses = text.substring(
                        begin + "May be offered at any of the following campuses:".length() + 5, end - 1);
            }
            temp = campuses.replace("    ", "#").split("#");
            List<String> camps = new ArrayList<String>();
            for (String s : temp) {
                if (s.length() > 1) {
                    camps.add(s);
                }

            }
            entry.setCampuses(camps);

            // get restrictions
            begin = text.indexOf("Restrictions:");
            end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (begin > 0 && end < 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length()).replace("      ", "\n"));
            } else if (begin > 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length(), end).replace("      ", "\n"));
            }

        } catch (StringIndexOutOfBoundsException e) {
            // no type, not available
            // System.out.println("-----------");
            // System.out.println("Error for cnbr = " + cnbr);
            // System.out.println("-----------");
        }
    } else {
        throw new CourseNotFoundException();
    }

    return entry;
}

From source file:com.serphacker.serposcope.scraper.google.scraper.GoogleScraper.java

protected String extractLink(Element element) {
    if (element == null) {
        return null;
    }//from  w w  w.j a v a 2s  .  co m

    String attr = element.attr("href");
    if (attr == null) {
        return null;
    }

    if ((attr.startsWith("http://www.google") || attr.startsWith("https://www.google"))) {
        if (attr.contains("/aclk?")) {
            return null;
        }
    }

    if (attr.startsWith("http://") || attr.startsWith("https://")) {
        return attr;
    }

    if (attr.startsWith("/url?")) {
        try {
            List<NameValuePair> parse = URLEncodedUtils.parse(attr.substring(5), Charset.forName("utf-8"));
            Map<String, String> map = parse.stream()
                    .collect(Collectors.toMap(NameValuePair::getName, NameValuePair::getValue));
            return map.get("q");
        } catch (Exception ex) {
            return null;
        }
    }

    return null;
}

From source file:com.liato.bankdroid.banking.banks.coop.Coop.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }/*from   w  w w. j  a va2 s .c o  m*/

    login();

    try {
        for (AccountType at : AccountType.values()) {
            response = urlopen.open(at.getUrl());
            Document d = Jsoup.parse(response);
            Elements historik = d.select("#historik section");
            TransactionParams params = new TransactionParams();
            mTransactionParams.put(at, params);
            if (historik != null && !historik.isEmpty()) {
                String data = historik.first().attr("data-controller");
                Matcher m = rePageGuid.matcher(data);
                if (m.find()) {
                    params.setPageGuid(m.group(1));
                }
            }
            Element date = d.getElementById("dateFrom");
            if (date != null) {
                params.setMinDate(date.hasAttr("min") ? date.attr("min") : null);
                params.setMaxDate(date.hasAttr("max") ? date.attr("max") : null);
            }
            Elements es = d.select(".List:contains(Saldo)");
            if (es != null && !es.isEmpty()) {
                List<String> names = new ArrayList<String>();
                List<String> values = new ArrayList<String>();
                for (Element e : es.first().select("dt")) {
                    names.add(e.text().replaceAll(":", "").trim());
                }
                for (Element e : es.first().select("dd")) {
                    values.add(e.text().trim());
                }
                for (int i = 0; i < Math.min(names.size(), values.size()); i++) {
                    Account a = new Account(names.get(i), Helpers.parseBalance(values.get(i)),
                            String.format("%s%d", at.getPrefix(), i));
                    a.setCurrency(Helpers.parseCurrency(values.get(i), "SEK"));
                    if (a.getName().toLowerCase().contains("disponibelt")) {
                        a.setType(Account.REGULAR);
                        balance = a.getBalance();
                        setCurrency(a.getCurrency());
                    } else {
                        a.setType(Account.OTHER);
                    }

                    if (i > 0) {
                        a.setAliasfor(String.format("%s%d", at.getPrefix(), 0));
                    }
                    accounts.add(a);
                }
            }
        }
    } catch (ClientProtocolException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    }

    try {
        RefundSummaryRequest refsumReq = new RefundSummaryRequest(mUserId, mToken, APPLICATION_ID);
        HttpEntity e = new StringEntity(getObjectmapper().writeValueAsString(refsumReq));
        InputStream is = urlopen
                .openStream("https://www.coop.se/ExternalServices/RefundService.svc/RefundSummary", e, true);
        RefundSummaryResponse refsumResp = readJsonValue(is, RefundSummaryResponse.class);
        if (refsumResp != null && refsumResp.getRefundSummaryResult() != null) {
            Account a = new Account("terbring p ditt kort",
                    BigDecimal.valueOf(refsumResp.getRefundSummaryResult().getAccountBalance()), "refsummary");
            a.setCurrency("SEK");
            if (accounts.isEmpty()) {
                balance = a.getBalance();
                setCurrency(a.getCurrency());
            }
            accounts.add(a);
            a = new Account(
                    String.format("terbring fr %s", refsumResp.getRefundSummaryResult().getMonthName()),
                    BigDecimal.valueOf(refsumResp.getRefundSummaryResult().getTotalRefund()),
                    "refsummary_month");
            accounts.add(a);
        }
    } catch (JsonParseException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (ClientProtocolException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    }

    if (accounts.isEmpty()) {
        throw new BankException(res.getText(R.string.no_accounts_found).toString());
    }
    super.updateComplete();
}

From source file:com.thesmartweb.swebrank.WebParser.java

/**
 * Parse the url and get all the content
 * @param link_html the url to parse/* w  w w.  j  a  va2  s.  com*/
 * @return The content parsed
 */
public String cleanhtml(String link_html) {
    try {
        Document doc = Jsoup.connect(link_html).timeout(10 * 1000).get();
        String title = doc.title();
        String mainbody = doc.body().text();
        Elements links = doc.select("a[href]");
        Elements media = doc.select("[src]");
        //fix link html to remove https:// or http:// and simple /
        if (link_html.substring(link_html.length() - 1, link_html.length()).equalsIgnoreCase("/")) {
            link_html = link_html.substring(0, link_html.length() - 1);
        }
        if (link_html.substring(0, 5).equalsIgnoreCase("https")) {
            link_html = link_html.substring(8);
        } else if (link_html.substring(0, 4).equalsIgnoreCase("http")) {
            link_html = link_html.substring(7);
        }
        String anchortext = "";
        String alttext = "";
        //-----get the anchor text of internal links
        for (Element link : links) {
            String str_check = link.attr("abs:href").toString();
            if (link.attr("abs:href").contains(link_html) && link.text().length() > 1) {
                anchortext = anchortext + link.text() + " ";
            }
        }
        //-------get alt text to internal images links
        for (Element medi : media) {
            if (medi.getElementsByTag("img").attr("src").toString().contains(link_html)) {
                alttext = alttext + " " + medi.getElementsByTag("img").attr("alt").toString();
            }
            if (medi.getElementsByTag("img").attr("src").toString().startsWith("/")) {
                alttext = alttext + " " + medi.getElementsByTag("img").attr("alt").toString();
            }
        }
        String content = mainbody + title + anchortext + alttext;

        return content;
    } catch (IOException ex) {
        Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    } catch (NullPointerException ex) {
        Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    } catch (Exception ex) {
        Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    }

}

From source file:org.bungeni.ext.integration.bungeniportal.BungeniServiceAccess.java

private List<BasicNameValuePair> getActionsViewButtonInfo(Document doc) {
    List<BasicNameValuePair> nvp = new ArrayList<BasicNameValuePair>(0);
    Elements inputList = doc.select("div#actionsView input");
    for (int i = 0; i < inputList.size(); i++) {
        Element inputItem = inputList.get(i);
        nvp.add(new BasicNameValuePair(inputItem.attr("name"), inputItem.attr("value")));
    }/*  w  ww.  j a  va 2  s. c om*/
    return nvp;
}

From source file:de.geeksfactory.opacclient.apis.WebOpacNet.java

@Override
public List<SearchField> getSearchFields() throws IOException, JSONException {
    List<SearchField> fields = new ArrayList<>();

    // Text fields
    String html = httpGet(opac_url + "/de/mobile/default.aspx", getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    Elements options = doc.select("#drpOptSearchT option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(option.text());
        field.setId(option.attr("value"));
        field.setData(new JSONObject("{\"filter\":false}"));
        field.setHint("");
        fields.add(field);/*from   w w  w .  j  ava  2  s .c  om*/
    }

    // Dropdowns
    String text = httpGet(opac_url + "/de/mobile/GetRestrictions.ashx", getDefaultEncoding());
    JSONArray filters = new JSONObject(text).getJSONArray("restrcontainers");
    for (int i = 0; i < filters.length(); i++) {
        JSONObject filter = filters.getJSONObject(i);
        if (filter.getString("querytyp").equals("EJ")) {
            // Querying by year also works for other years than the ones
            // listed
            // -> Make it a text field instead of a dropdown
            TextSearchField field = new TextSearchField();
            field.setDisplayName(filter.getString("kopf"));
            field.setId(filter.getString("querytyp"));
            field.setData(new JSONObject("{\"filter\":true}"));
            field.setHint("");
            fields.add(field);
        } else {
            DropdownSearchField field = new DropdownSearchField();
            field.setId(filter.getString("querytyp"));
            field.setDisplayName(filter.getString("kopf"));

            JSONArray restrictions = filter.getJSONArray("restrictions");

            field.addDropdownValue("", "Alle");

            for (int j = 0; j < restrictions.length(); j++) {
                JSONObject restriction = restrictions.getJSONObject(j);
                field.addDropdownValue(restriction.getString("id"), restriction.getString("bez"));
            }

            field.setData(new JSONObject("{\"filter\":true}"));
            fields.add(field);
        }
    }

    return fields;
}