Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:Main.java

public static void main(String[] args) {
    String html = "<!html>" + "<html><body>" + "<span id='my'>" + "<span class=''contentTitle'>Director:</span>"
            + "<span>test</span></span>" + "</body></html>";

    Document doc = Jsoup.parse(html);
    System.out.println(doc.toString());

    Elements spanWithId = doc.select("span#my");

    if (spanWithId != null) {
        System.out.printf("Found %d Elements\n", spanWithId.size());

        if (!spanWithId.isEmpty()) {
            Iterator<Element> it = spanWithId.iterator();
            Element element = null;
            while (it.hasNext()) {
                element = it.next();/*from  ww  w  .ja v a 2s  .  c  o  m*/
                System.out.println(element.toString());
            }
        }
    }
}

From source file:Main.java

public static void main(String[] args) throws Exception {
    String url = "http://www.java2s.com";
    Document doc = Jsoup.connect(url).userAgent(
            "Mozilla/5.0 (Windows; U; WindowsNT 5.1;" + " en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6")
            .get();/*  w  w  w  .ja  v  a2s.  c om*/

    Elements resultDivElems = doc.select("div");
    Iterator<Element> itr = resultDivElems.iterator();

    while (itr.hasNext()) {
        System.out.println(((Element) itr.next()).text());
    }
}

From source file:downloadwolkflow.getWorkFlowList.java

public static void main(String args[]) {
    CloseableHttpClient httpclient = HttpClients.createDefault();
    String[] pageList = getPageList();
    System.out.println(pageList.length);
    for (int i = 1; i < pageList.length; i++) {
        System.out.println(pageList[i]);
        System.out.println("---------------------------------------------------------------------------");
        HttpGet httpget = new HttpGet(pageList[i]);
        try {/*from w  w w. j a v a2  s  .c  o  m*/
            HttpResponse response = httpclient.execute(httpget);
            String page = EntityUtils.toString(response.getEntity());
            Document mainDoc = Jsoup.parse(page);
            Elements resultList = mainDoc.select("div.resource_list_item");
            for (int j = 0; j < resultList.size(); j++) {
                Element workflowResult = resultList.get(j);
                Element detailInfo = workflowResult.select("div.main_panel").first().select("p.title.inline")
                        .first().select("a").first();
                String detailUrl = "http://www.myexperiment.org" + detailInfo.attributes().get("href")
                        + ".html";
                System.out.println(detailUrl);
                downloadWorkFlow(detailUrl, httpclient);
                Thread.sleep(1000);
            }
        } catch (IOException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        } catch (InterruptedException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    try {
        httpclient.close();
    } catch (IOException ex) {
        Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:zz.pseas.ghost.login.taobao.MTaobaoLogin.java

public static void main(String[] args) throws UnsupportedEncodingException {

    String tbuserNmae = "TBname";
    String tbpassWord = "TBpasssword";

    GhostClient iPhone = new GhostClient("utf-8");
    String ans = iPhone.get("https://login.m.taobao.com/login.htm");

    Document doc = Jsoup.parse(ans);
    String url = doc.select("form#loginForm").first().attr("action");

    String _tb_token = doc.select("input[name=_tb_token_]").first().attr("value");

    String sid = doc.select("input[name=sid]").first().attr("value");
    System.out.println(_tb_token);
    System.out.println(sid);//from  ww w  .j  a va  2 s .  co m
    System.out.println(url);

    HashMap<String, String> map = new HashMap<String, String>();
    map.put("TPL_password", tbpassWord);
    map.put("TPL_username", tbuserNmae);
    map.put("_tb_token_", _tb_token);
    map.put("action", "LoginAction");
    map.put("event_submit_do_login", "1");
    map.put("loginFrom", "WAP_TAOBAO");
    map.put("sid", sid);

    String location = null;
    while (true) {
        CommonsPage commonsPage = iPhone.postForPage(url, map);
        location = commonsPage.getHeader("Location");
        String postAns = new String(commonsPage.getContents(), "utf-8");
        if (StringUtil.isNotEmpty(location) && StringUtil.isEmpty(postAns)) {
            break;
        }

        String s = Jsoup.parse(postAns).select("img.checkcode-img").first().attr("src");
        String imgUrl = "https:" + s;

        byte[] bytes = iPhone.getBytes(imgUrl);
        FileUtil.writeFile(bytes, "g:/tbCaptcha.jpg");

        String wepCheckId = Jsoup.parse(postAns).select("input[name=wapCheckId]").val();
        String captcha = null;
        map.put("TPL_checkcode", captcha);
        map.put("wapCheckId", wepCheckId);
    }

    iPhone.get(location);

    String tk = iPhone.getCookieValue("_m_h5_tk");
    if (StringUtil.isNotEmpty(tk)) {
        tk = tk.split("_")[0];
    } else {
        tk = "undefined";
    }

    String url2 = genUrl(tk);
    String ans1 = iPhone.get(url2);
    System.out.println(url2);
    System.out.println(ans1);

    tk = iPhone.getCookieValue("_m_h5_tk").split("_")[0];
    if (StringUtil.isEmpty(tk)) {
        tk = "undefined";
    }
    System.out.println(tk);
    url2 = genUrl(tk);
    iPhone.showCookies();
    RequestConfig requestConfig = RequestConfig.custom().setProxy(new HttpHost("127.0.0.1", 8888)).build();

    HttpUriRequest get = RequestBuilder.get().setConfig(requestConfig)
            //.addHeader("User-Agent","Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16")
            .addHeader("Host", "api.m.taobao.com").setUri(url2).build();

    ans1 = iPhone.execute(get);

    System.out.println(ans1);

}

From source file:com.alexoree.jenkins.Main.java

public static void main(String[] args) throws Exception {
    // create Options object
    Options options = new Options();

    options.addOption("t", false, "throttle the downloads, waits 5 seconds in between each d/l");

    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    formatter.printHelp("jenkins-sync", options);

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(options, args);
    boolean throttle = cmd.hasOption("t");

    String plugins = "https://updates.jenkins-ci.org/latest/";
    List<String> ps = new ArrayList<String>();
    Document doc = Jsoup.connect(plugins).get();
    for (Element file : doc.select("td a")) {
        //System.out.println(file.attr("href"));
        if (file.attr("href").endsWith(".hpi") || file.attr("href").endsWith(".war")) {
            ps.add(file.attr("href"));
        }/*from  www  . j a  v a 2s  . c  om*/
    }

    File root = new File(".");
    //https://updates.jenkins-ci.org/latest/AdaptivePlugin.hpi
    new File("./latest").mkdirs();

    //output zip file
    String zipFile = "jenkinsSync.zip";
    // create byte buffer
    byte[] buffer = new byte[1024];
    FileOutputStream fos = new FileOutputStream(zipFile);
    ZipOutputStream zos = new ZipOutputStream(fos);

    //download the plugins
    for (int i = 0; i < ps.size(); i++) {
        System.out.println("[" + i + "/" + ps.size() + "] downloading " + plugins + ps.get(i));
        String outputFile = download(root.getAbsolutePath() + "/latest/" + ps.get(i), plugins + ps.get(i));

        FileInputStream fis = new FileInputStream(outputFile);
        // begin writing a new ZIP entry, positions the stream to the start of the entry data
        zos.putNextEntry(new ZipEntry(outputFile.replace(root.getAbsolutePath(), "")
                .replace("updates.jenkins-ci.org/", "").replace("https:/", "")));
        int length;
        while ((length = fis.read(buffer)) > 0) {
            zos.write(buffer, 0, length);
        }
        zos.closeEntry();
        fis.close();
        if (throttle)
            Thread.sleep(WAIT);
        new File(root.getAbsolutePath() + "/latest/" + ps.get(i)).deleteOnExit();
    }

    //download the json metadata
    plugins = "https://updates.jenkins-ci.org/";
    ps = new ArrayList<String>();
    doc = Jsoup.connect(plugins).get();
    for (Element file : doc.select("td a")) {
        //System.out.println(file.attr("href"));
        if (file.attr("href").endsWith(".json")) {
            ps.add(file.attr("href"));
        }
    }
    for (int i = 0; i < ps.size(); i++) {
        download(root.getAbsolutePath() + "/" + ps.get(i), plugins + ps.get(i));

        FileInputStream fis = new FileInputStream(root.getAbsolutePath() + "/" + ps.get(i));
        // begin writing a new ZIP entry, positions the stream to the start of the entry data
        zos.putNextEntry(new ZipEntry(plugins + ps.get(i)));
        int length;
        while ((length = fis.read(buffer)) > 0) {
            zos.write(buffer, 0, length);
        }
        zos.closeEntry();
        fis.close();
        new File(root.getAbsolutePath() + "/" + ps.get(i)).deleteOnExit();
        if (throttle)
            Thread.sleep(WAIT);
    }

    // close the ZipOutputStream
    zos.close();
}

From source file:com.atlbike.etl.service.ClientFormLogin.java

/**
 * @param args/*from w  w w.j  a v a2 s  .c  o  m*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    String authenticityToken = "";
    BasicCookieStore cookieStore = new BasicCookieStore();
    CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(cookieStore)
            .setRedirectStrategy(new LaxRedirectStrategy()).build();
    try {
        HttpGet httpget = new HttpGet("https://atlbike.nationbuilder.com/login");
        CloseableHttpResponse response1 = httpclient.execute(httpget);
        try {
            HttpEntity entity = response1.getEntity();
            System.out.println("Content Length: " + entity.getContentLength());

            System.out.println("Login form get: " + response1.getStatusLine());
            // EntityUtils.consume(entity);
            String content = EntityUtils.toString(entity);
            Document doc = Jsoup.parse(content);
            Elements metaElements = doc.select("META");
            for (Element elem : metaElements) {
                System.out.println(elem);
                if (elem.hasAttr("name") && "csrf-token".equals(elem.attr("name"))) {
                    System.out.println("Value: " + elem.attr("content"));
                    authenticityToken = elem.attr("content");
                }
            }

            System.out.println("Initial set of cookies:");
            List<Cookie> cookies = cookieStore.getCookies();
            if (cookies.isEmpty()) {
                System.out.println("None");
            } else {
                for (int i = 0; i < cookies.size(); i++) {
                    System.out.println("- " + cookies.get(i).toString());
                }
            }
        } finally {
            response1.close();
        }

        HttpUriRequest login = RequestBuilder.post()
                .setUri(new URI("https://atlbike.nationbuilder.com/forms/user_sessions"))
                .addParameter("email_address", "").addParameter("user_session[email]", "email@domain")
                .addParameter("user_session[password]", "magicCookie")
                .addParameter("user_session[remember_me]", "1").addParameter("commit", "Sign in with email")
                .addParameter("authenticity_token", authenticityToken).build();
        CloseableHttpResponse response2 = httpclient.execute(login);
        try {
            HttpEntity entity = response2.getEntity();
            // for (Header h : response2.getAllHeaders()) {
            // System.out.println(h);
            // }
            System.out.println("Content Length: " + entity.getContentLength());

            System.out.println("Login form get: " + response2.getStatusLine());
            EntityUtils.consume(entity);

            System.out.println("Post logon cookies:");
            List<Cookie> cookies = cookieStore.getCookies();
            if (cookies.isEmpty()) {
                System.out.println("None");
            } else {
                for (int i = 0; i < cookies.size(); i++) {
                    System.out.println("- " + cookies.get(i).toString());
                }
            }
        } finally {
            response2.close();
        }

        httpget = new HttpGet(
                // HttpUriRequest file = RequestBuilder
                // .post()
                // .setUri(new URI(
                "https://atlbike.nationbuilder.com/admin/membership_types/14/download");
        // .build();
        // CloseableHttpResponse response3 = httpclient.execute(file);
        CloseableHttpResponse response3 = httpclient.execute(httpget);
        try {
            HttpEntity entity = response3.getEntity();
            System.out.println("Content Length: " + entity.getContentLength());

            System.out.println("File Get: " + response3.getStatusLine());
            saveEntity(entity);
            // EntityUtils.consume(entity);

            System.out.println("Post file get cookies:");
            List<Cookie> cookies = cookieStore.getCookies();
            if (cookies.isEmpty()) {
                System.out.println("None");
            } else {
                for (int i = 0; i < cookies.size(); i++) {
                    System.out.println("- " + cookies.get(i).toString());
                }
            }
        } finally {
            response3.close();
        }

    } finally {
        httpclient.close();
    }
}

From source file:module.entities.NameFinder.RegexNameFinder.java

/**
 * @param args the command line arguments
 *//*from   w  ww.ja v  a  2 s . c  o m*/
public static void main(String[] args) throws SQLException, IOException {

    if (args.length == 1) {
        Config.configFile = args[0];
    }
    long lStartTime = System.currentTimeMillis();
    Timestamp startTime = new Timestamp(lStartTime);
    System.out.println("Regex Name Finder process started at: " + startTime);
    DB.initPostgres();
    regexerId = DB.LogRegexFinder(lStartTime);
    initLexicons();
    JSONObject obj = new JSONObject();
    TreeMap<Integer, String> consultations = DB.getDemocracitConsultationBody();
    Document doc;
    int count = 0;
    TreeMap<Integer, String> consFoundNames = new TreeMap<>();
    TreeMap<Integer, String> consFoundRoles = new TreeMap<>();
    for (int consId : consultations.keySet()) {
        String consBody = consultations.get(consId);
        String signName = "", roleName = "";
        doc = Jsoup.parse(consBody);
        Elements allPars = new Elements();
        Elements paragraphs = doc.select("p");
        for (Element par : paragraphs) {
            if (par.html().contains("<br>")) {
                String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>";
                Document internal_doc = Jsoup.parse(out);
                Elements subparagraphs = internal_doc.select("p");
                allPars.addAll(subparagraphs);
            } else {
                allPars.add(par);
            }
            //                System.out.println(formatedText);
        }
        String signature = getSignatureFromParagraphs(allPars);
        //            System.out.println(signature);
        if (signature.contains("#")) {
            String[] sign_tokens = signature.split("#");
            signName = sign_tokens[0];
            if (sign_tokens.length > 1) {
                roleName = sign_tokens[1];
            }
            consFoundNames.put(consId, signName.trim());
            consFoundRoles.put(consId, roleName.trim());
            count++;
        } else {
            System.err.println("--" + consId);
        }
        //           
    }
    DB.insertDemocracitConsultationMinister(consFoundNames, consFoundRoles);

    TreeMap<Integer, String> consultationsCompletedText = DB.getDemocracitCompletedConsultationBody();
    Document doc2;
    TreeMap<Integer, String> complConsFoundNames = new TreeMap<>();
    int count2 = 0;
    for (int consId : consultationsCompletedText.keySet()) {
        String consBody = consultationsCompletedText.get(consId);
        String signName = "", roleName = "";
        doc2 = Jsoup.parse(consBody);
        //            if (doc.text().contains("<br>")) {
        //                doc.text().replaceAll("(<[Bb][Rr]>)+", "<p>");
        //            }
        Elements allPars = new Elements();
        Elements paragraphs = doc2.select("p");
        for (Element par : paragraphs) {

            if (par.html().contains("<br>")) {
                String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>";
                Document internal_doc = Jsoup.parse(out);
                Elements subparagraphs = internal_doc.select("p");
                allPars.addAll(subparagraphs);
            } else {
                allPars.add(par);
            }
        }
        String signature = getSignatureFromParagraphs(allPars);
        if (signature.contains("#")) {
            String[] sign_tokens = signature.split("#");
            signName = sign_tokens[0];
            if (sign_tokens.length > 1) {
                roleName = sign_tokens[1];
            }
            consFoundNames.put(consId, signName.trim());
            consFoundRoles.put(consId, roleName.trim());
            //                System.out.println(consId);
            //                System.out.println(signName.trim());
            //                System.out.println("***************");
            count2++;
        } else {
            System.err.println("++" + consId);
        }
    }
    DB.insertDemocracitConsultationMinister(complConsFoundNames, consFoundRoles);
    long lEndTime = System.currentTimeMillis();
    System.out.println("Regex Name Finder process finished at: " + startTime);
    obj.put("message", "Regex Name Finder finished with no errors");
    obj.put("details", "");
    DB.UpdateLogRegexFinder(lEndTime, regexerId, obj);
    DB.close();
}

From source file:isc_415_practica_1.ISC_415_Practica_1.java

/**
 * @param args the command line arguments
 *///from   w  w  w .  j a v  a2s  .  c o m
public static void main(String[] args) {
    String urlString;
    Scanner input = new Scanner(System.in);
    Document doc;

    try {
        urlString = input.next();
        if (urlString.equals("servlet")) {
            urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client";
        }
        urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString
                : "http://" + urlString;
        doc = Jsoup.connect(urlString).get();
    } catch (Exception ex) {
        System.out.println("El URL ingresado no es valido.");
        return;
    }

    ArrayList<NameValuePair> formInputParams;
    formInputParams = new ArrayList<>();
    String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n");
    System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length));
    System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size()));
    System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size()));
    System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size()));

    Integer index = 1;

    ArrayList<NameValuePair> urlParameters = new ArrayList<>();
    for (Element e : doc.select("form")) {
        System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size()));
        System.out.println(e.select("input"));

        for (Element formInput : e.select("input")) {
            if (formInput.attr("id") != null && formInput.attr("id") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1"));
            } else if (formInput.attr("name") != null && formInput.attr("name") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1"));
            }
        }

        index++;
    }

    if (!urlParameters.isEmpty()) {
        try {
            CloseableHttpClient httpclient = HttpClients.createDefault();
            UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8);
            HttpPost httpPost = new HttpPost(urlString);
            httpPost.setHeader("User-Agent", USER_AGENT);
            httpPost.setEntity(entity);
            HttpResponse response = httpclient.execute(httpPost);
            System.out.println(response.getStatusLine());
        } catch (IOException ex) {
            Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

}

From source file:Main.java

public static String getPrivacyNotice(String url, String tag) throws IOException {
    Document doc = Jsoup.connect(url).get();
    return doc.select(tag).first().text();
}

From source file:Main.java

public static void parseHtml(String html) {
    Document document = Jsoup.parse(html);
    Element linkElement = document.select("a").first();
    String linkHref = linkElement.attr("href"); // "http://sample.com"
    String linkText = linkElement.text(); // "This is sample"
    System.out.println(linkHref);
    System.out.println(linkText);
}