Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:downloadwolkflow.getWorkFlowList.java

public static void main(String args[]) {
    CloseableHttpClient httpclient = HttpClients.createDefault();
    String[] pageList = getPageList();
    System.out.println(pageList.length);
    for (int i = 1; i < pageList.length; i++) {
        System.out.println(pageList[i]);
        System.out.println("---------------------------------------------------------------------------");
        HttpGet httpget = new HttpGet(pageList[i]);
        try {/*w  w  w. ja  va2  s .  c  o m*/
            HttpResponse response = httpclient.execute(httpget);
            String page = EntityUtils.toString(response.getEntity());
            Document mainDoc = Jsoup.parse(page);
            Elements resultList = mainDoc.select("div.resource_list_item");
            for (int j = 0; j < resultList.size(); j++) {
                Element workflowResult = resultList.get(j);
                Element detailInfo = workflowResult.select("div.main_panel").first().select("p.title.inline")
                        .first().select("a").first();
                String detailUrl = "http://www.myexperiment.org" + detailInfo.attributes().get("href")
                        + ".html";
                System.out.println(detailUrl);
                downloadWorkFlow(detailUrl, httpclient);
                Thread.sleep(1000);
            }
        } catch (IOException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        } catch (InterruptedException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    try {
        httpclient.close();
    } catch (IOException ex) {
        Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:isc_415_practica_1.ISC_415_Practica_1.java

/**
 * @param args the command line arguments
 *//*from ww  w.  j av  a2  s .c  o  m*/
public static void main(String[] args) {
    String urlString;
    Scanner input = new Scanner(System.in);
    Document doc;

    try {
        urlString = input.next();
        if (urlString.equals("servlet")) {
            urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client";
        }
        urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString
                : "http://" + urlString;
        doc = Jsoup.connect(urlString).get();
    } catch (Exception ex) {
        System.out.println("El URL ingresado no es valido.");
        return;
    }

    ArrayList<NameValuePair> formInputParams;
    formInputParams = new ArrayList<>();
    String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n");
    System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length));
    System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size()));
    System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size()));
    System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size()));

    Integer index = 1;

    ArrayList<NameValuePair> urlParameters = new ArrayList<>();
    for (Element e : doc.select("form")) {
        System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size()));
        System.out.println(e.select("input"));

        for (Element formInput : e.select("input")) {
            if (formInput.attr("id") != null && formInput.attr("id") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1"));
            } else if (formInput.attr("name") != null && formInput.attr("name") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1"));
            }
        }

        index++;
    }

    if (!urlParameters.isEmpty()) {
        try {
            CloseableHttpClient httpclient = HttpClients.createDefault();
            UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8);
            HttpPost httpPost = new HttpPost(urlString);
            httpPost.setHeader("User-Agent", USER_AGENT);
            httpPost.setEntity(entity);
            HttpResponse response = httpclient.execute(httpPost);
            System.out.println(response.getStatusLine());
        } catch (IOException ex) {
            Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

}

From source file:gpxparser.GpxParser.java

/**
 * @param args the command line arguments
 *///from www. j  a va  2  s  .c o m
public static void main(String[] args) {
    File input = new File("/home/yonseca/4.gpx");
    Track track = new Track();
    try {
        Document doc = Jsoup.parse(input, "UTF-8");
        //System.out.println(doc.text());
        Elements trackData = doc.getElementsByTag("trk");
        Elements trackName = trackData.select("name");
        track.setName(trackName.text());
        Elements trkPt = trackData.select("trkseg").select("trkpt");
        for (Iterator<Element> iterator = trkPt.iterator(); iterator.hasNext();) {
            Element dataPoint = iterator.next();
            Double lat = NumberUtils.toDouble(dataPoint.attr("lat"));
            Double lon = NumberUtils.toDouble(dataPoint.attr("lon"));
            Double altitude = NumberUtils.toDouble(dataPoint.select("ele").text());
            track.addPoint(lat, lon, altitude);
        }
        System.out.println("");

    } catch (IOException ex) {
        Logger.getLogger(GpxParser.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:Main.java

private static String parseCardContent(Document doc) {
    try {//from   www .ja va2s.  com
        Element summaryEl = doc.select("div[class=card-summary-content]").get(0);
        summaryEl.select("sup").remove();

        return summaryEl.text();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:Main.java

public static Element findFirstElement(Element segment, String name, String attrname, String attrvalue) {
    List<Element> elements = segment.select(name);
    if (elements != null) {
        for (Element element : elements) {
            String value = element.attr(attrname);
            if (value != null && value.equals(attrvalue)) {
                return element;
            }/*from  w  ww  .ja  va 2  s.  c o  m*/
        }
    }

    return null;
}

From source file:com.company.ComponentParser.java

public static Compo Parse(String url) throws Exception {
    InputStream inputStream = new URL(url).openStream();
    String content = IOUtils.toString(inputStream);

    Document document = Jsoup.parse(content);
    Element body = document.body();
    Elements elements = body.select(".grid");
    Compo compo = new Compo();

    Elements tds = elements.get(1).select("th"); //System.out.println(content);

    tds.forEach(element -> {/* w  w  w.j  a  va 2  s  .c o m*/

        //  System.out.println(element.text());
        switch (element.text()) {
        case "License":
            compo.setLicense(element.nextElementSibling().text());
            break;
        case "Categories":
            compo.setCategories(element.nextElementSibling().text());
            break;
        case "HomePage":
            compo.setHomePage(element.nextElementSibling().select("a").text());
            break;
        case "Date":
            compo.setDate(element.nextElementSibling().text());
            break;
        case "Repository":
            compo.setRepository(element.nextElementSibling().text());
            break;
        case "Usages":
            compo.setUsage(element.nextElementSibling().text());
            break;
        }
    });

    return compo;
}

From source file:org.brnvrn.Main.java

/**
 * Parse a HTML document, add tools to the list
 *///  ww w.  ja v a2s  .co  m
private static ObjectMapper parseDocument(List<Tool> tools, Document doc, boolean obsolete) {
    // http://jsoup.org/apidocs/org/jsoup/select/Selector.html
    Elements category_div = doc.select("div.container div.row:has(table)"); // we loop over each category table
    System.out.println("Parsing " + (obsolete ? "obsolete" : "") + " doc.   ###");
    System.out.println(" Found " + category_div.size() + " categories.");

    for (Element tool_div : category_div) {
        String category = tool_div.select("strong").text();
        parseCategory(tools, tool_div, category, obsolete);
    }
    System.out.println(" Got " + tools.size() + " tools.");

    ObjectMapper objectMapper = new ObjectMapper();
    objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
    return objectMapper;
}

From source file:org.brnvrn.Main.java

/**
 * Parse a tr HTML element describing the tool
 * @param tool is to be updated/*from w  w w.  j av a  2  s .  com*/
 * @param tr   brings the data
 * @return true if successful
 */
private static boolean parseTrTool(Tool tool, Element tr) {
    boolean success = true;

    Element nameLink = tr.select("td:eq(0)").first();
    if (nameLink == null)
        return false;
    tool.setName(nameLink.text());
    tool.setUrl(nameLink.getElementsByTag("a").attr("href"));

    tool.setLicense(tr.select("td:eq(2)").first().text());

    tool.setCompatibility(tr.select("td:eq(3)").first().text());

    // More complicated: We will extract and remove known nodes, the rest will be description
    Element tdDescription = tr.select("td:eq(1)").first();
    Elements smalls = tdDescription.getElementsByTag("small");
    for (Element small : smalls) {
        Element author = small.getElementsContainingText("Author").first();
        if (author != null) {
            String authorsString = author.text();
            authorsString = authorsString.substring(authorsString.indexOf(":") + 1);
            tool.addAuthor(authorsString.split(","));
            small.remove();
        }
        Element sourceCode = small.getElementsContainingText("ource").last();
        if (sourceCode != null) {
            tool.setUrl_src(sourceCode.attr("href"));
            small.remove();
        }
    }
    tdDescription.getElementsByTag("br").remove();
    tool.setDescription(Jsoup.clean(tdDescription.html(), Whitelist.relaxed())); // ownText will miss the contained links in the description
    tool.setDescriptionText(tdDescription.text());

    bestEffortThemeLanguage(tool);

    return success;
}

From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java

public static void getTestDescription(String path) {
    Document htmlFile = null;//  w w w  .  ja va2 s  .co m
    try {
        htmlFile = Jsoup.parse(new File(basepath + path), "UTF-8");
    } catch (IOException e) {
        System.out.println("Exception in parse Current Run html file" + e.getMessage());
    }

    for (Element table : htmlFile.select("table[id=tableStyle]")) {
        Elements row1 = table.select("tr");
        for (int j = 0; j < row1.size(); j++) {
            Element tds1 = row1.get(j);
            Elements tds = tds1.select("td");

            for (int i = 0; i < tds.size(); i++) {
                Element link = tds.get(i);
                String link_temp = link.toString();

                if (i == 1) {
                    //   System.out.println("data" + link_temp);
                    if (!TestCaseDesMap.containsKey(path)) {
                        TestCaseDesMap.put(path, Jsoup.parse(link_temp).text());
                    }
                    break;
                }
            }

        }
    }

}

From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java

public static void getTestCaseHtmlPath(String path) {
    Document htmlFile = null;//from  ww w  .j a  va  2s .co m
    try {
        htmlFile = Jsoup.parse(new File(path), "UTF-8");
    } catch (IOException e) {
        System.out.println("Exception in parse Current Run html file" + e.getMessage());
    }
    for (Element table : htmlFile.select("table[id=tableStyle]")) {
        Elements row1 = table.select("tr");
        for (int j = 0; j < row1.size(); j++) {
            Element tds1 = row1.get(j);
            Elements tds = tds1.select("td");
            for (int i = 0; i < tds.size(); i++) {
                Element link = tds.get(i);
                Elements href = link.select("a");

                if (i == 0) {
                    if (href.size() > 0) {
                        String[] temp_ar = href.get(0).text("href").toString().split("\"");
                        getTestDescription(temp_ar[1]);
                        break;

                    }
                }

            }
        }

    }

}