List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:downloadwolkflow.getWorkFlowList.java
public static void main(String args[]) { CloseableHttpClient httpclient = HttpClients.createDefault(); String[] pageList = getPageList(); System.out.println(pageList.length); for (int i = 1; i < pageList.length; i++) { System.out.println(pageList[i]); System.out.println("---------------------------------------------------------------------------"); HttpGet httpget = new HttpGet(pageList[i]); try {/*w w w. ja va2 s . c o m*/ HttpResponse response = httpclient.execute(httpget); String page = EntityUtils.toString(response.getEntity()); Document mainDoc = Jsoup.parse(page); Elements resultList = mainDoc.select("div.resource_list_item"); for (int j = 0; j < resultList.size(); j++) { Element workflowResult = resultList.get(j); Element detailInfo = workflowResult.select("div.main_panel").first().select("p.title.inline") .first().select("a").first(); String detailUrl = "http://www.myexperiment.org" + detailInfo.attributes().get("href") + ".html"; System.out.println(detailUrl); downloadWorkFlow(detailUrl, httpclient); Thread.sleep(1000); } } catch (IOException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } catch (InterruptedException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } } try { httpclient.close(); } catch (IOException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:isc_415_practica_1.ISC_415_Practica_1.java
/** * @param args the command line arguments *//*from ww w. j av a2 s .c o m*/ public static void main(String[] args) { String urlString; Scanner input = new Scanner(System.in); Document doc; try { urlString = input.next(); if (urlString.equals("servlet")) { urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client"; } urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString : "http://" + urlString; doc = Jsoup.connect(urlString).get(); } catch (Exception ex) { System.out.println("El URL ingresado no es valido."); return; } ArrayList<NameValuePair> formInputParams; formInputParams = new ArrayList<>(); String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n"); System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length)); System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size())); System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size())); System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size())); Integer index = 1; ArrayList<NameValuePair> urlParameters = new ArrayList<>(); for (Element e : doc.select("form")) { System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size())); System.out.println(e.select("input")); for (Element formInput : e.select("input")) { if (formInput.attr("id") != null && formInput.attr("id") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1")); } else if (formInput.attr("name") != null && formInput.attr("name") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1")); } } index++; } if (!urlParameters.isEmpty()) { try { CloseableHttpClient httpclient = HttpClients.createDefault(); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8); HttpPost httpPost = new HttpPost(urlString); httpPost.setHeader("User-Agent", USER_AGENT); httpPost.setEntity(entity); HttpResponse response = httpclient.execute(httpPost); System.out.println(response.getStatusLine()); } catch (IOException ex) { Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:gpxparser.GpxParser.java
/** * @param args the command line arguments *///from www. j a va 2 s .c o m public static void main(String[] args) { File input = new File("/home/yonseca/4.gpx"); Track track = new Track(); try { Document doc = Jsoup.parse(input, "UTF-8"); //System.out.println(doc.text()); Elements trackData = doc.getElementsByTag("trk"); Elements trackName = trackData.select("name"); track.setName(trackName.text()); Elements trkPt = trackData.select("trkseg").select("trkpt"); for (Iterator<Element> iterator = trkPt.iterator(); iterator.hasNext();) { Element dataPoint = iterator.next(); Double lat = NumberUtils.toDouble(dataPoint.attr("lat")); Double lon = NumberUtils.toDouble(dataPoint.attr("lon")); Double altitude = NumberUtils.toDouble(dataPoint.select("ele").text()); track.addPoint(lat, lon, altitude); } System.out.println(""); } catch (IOException ex) { Logger.getLogger(GpxParser.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:Main.java
private static String parseCardContent(Document doc) { try {//from www .ja va2s. com Element summaryEl = doc.select("div[class=card-summary-content]").get(0); summaryEl.select("sup").remove(); return summaryEl.text(); } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:Main.java
public static Element findFirstElement(Element segment, String name, String attrname, String attrvalue) { List<Element> elements = segment.select(name); if (elements != null) { for (Element element : elements) { String value = element.attr(attrname); if (value != null && value.equals(attrvalue)) { return element; }/*from w ww .ja va 2 s. c o m*/ } } return null; }
From source file:com.company.ComponentParser.java
public static Compo Parse(String url) throws Exception { InputStream inputStream = new URL(url).openStream(); String content = IOUtils.toString(inputStream); Document document = Jsoup.parse(content); Element body = document.body(); Elements elements = body.select(".grid"); Compo compo = new Compo(); Elements tds = elements.get(1).select("th"); //System.out.println(content); tds.forEach(element -> {/* w w w.j a va 2 s .c o m*/ // System.out.println(element.text()); switch (element.text()) { case "License": compo.setLicense(element.nextElementSibling().text()); break; case "Categories": compo.setCategories(element.nextElementSibling().text()); break; case "HomePage": compo.setHomePage(element.nextElementSibling().select("a").text()); break; case "Date": compo.setDate(element.nextElementSibling().text()); break; case "Repository": compo.setRepository(element.nextElementSibling().text()); break; case "Usages": compo.setUsage(element.nextElementSibling().text()); break; } }); return compo; }
From source file:org.brnvrn.Main.java
/** * Parse a HTML document, add tools to the list */// ww w. ja v a2s .co m private static ObjectMapper parseDocument(List<Tool> tools, Document doc, boolean obsolete) { // http://jsoup.org/apidocs/org/jsoup/select/Selector.html Elements category_div = doc.select("div.container div.row:has(table)"); // we loop over each category table System.out.println("Parsing " + (obsolete ? "obsolete" : "") + " doc. ###"); System.out.println(" Found " + category_div.size() + " categories."); for (Element tool_div : category_div) { String category = tool_div.select("strong").text(); parseCategory(tools, tool_div, category, obsolete); } System.out.println(" Got " + tools.size() + " tools."); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); return objectMapper; }
From source file:org.brnvrn.Main.java
/** * Parse a tr HTML element describing the tool * @param tool is to be updated/*from w w w. j av a 2 s . com*/ * @param tr brings the data * @return true if successful */ private static boolean parseTrTool(Tool tool, Element tr) { boolean success = true; Element nameLink = tr.select("td:eq(0)").first(); if (nameLink == null) return false; tool.setName(nameLink.text()); tool.setUrl(nameLink.getElementsByTag("a").attr("href")); tool.setLicense(tr.select("td:eq(2)").first().text()); tool.setCompatibility(tr.select("td:eq(3)").first().text()); // More complicated: We will extract and remove known nodes, the rest will be description Element tdDescription = tr.select("td:eq(1)").first(); Elements smalls = tdDescription.getElementsByTag("small"); for (Element small : smalls) { Element author = small.getElementsContainingText("Author").first(); if (author != null) { String authorsString = author.text(); authorsString = authorsString.substring(authorsString.indexOf(":") + 1); tool.addAuthor(authorsString.split(",")); small.remove(); } Element sourceCode = small.getElementsContainingText("ource").last(); if (sourceCode != null) { tool.setUrl_src(sourceCode.attr("href")); small.remove(); } } tdDescription.getElementsByTag("br").remove(); tool.setDescription(Jsoup.clean(tdDescription.html(), Whitelist.relaxed())); // ownText will miss the contained links in the description tool.setDescriptionText(tdDescription.text()); bestEffortThemeLanguage(tool); return success; }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void getTestDescription(String path) { Document htmlFile = null;// w w w . ja va2 s .co m try { htmlFile = Jsoup.parse(new File(basepath + path), "UTF-8"); } catch (IOException e) { System.out.println("Exception in parse Current Run html file" + e.getMessage()); } for (Element table : htmlFile.select("table[id=tableStyle]")) { Elements row1 = table.select("tr"); for (int j = 0; j < row1.size(); j++) { Element tds1 = row1.get(j); Elements tds = tds1.select("td"); for (int i = 0; i < tds.size(); i++) { Element link = tds.get(i); String link_temp = link.toString(); if (i == 1) { // System.out.println("data" + link_temp); if (!TestCaseDesMap.containsKey(path)) { TestCaseDesMap.put(path, Jsoup.parse(link_temp).text()); } break; } } } } }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void getTestCaseHtmlPath(String path) { Document htmlFile = null;//from ww w .j a va 2s .co m try { htmlFile = Jsoup.parse(new File(path), "UTF-8"); } catch (IOException e) { System.out.println("Exception in parse Current Run html file" + e.getMessage()); } for (Element table : htmlFile.select("table[id=tableStyle]")) { Elements row1 = table.select("tr"); for (int j = 0; j < row1.size(); j++) { Element tds1 = row1.get(j); Elements tds = tds1.select("td"); for (int i = 0; i < tds.size(); i++) { Element link = tds.get(i); Elements href = link.select("a"); if (i == 0) { if (href.size() > 0) { String[] temp_ar = href.get(0).text("href").toString().split("\""); getTestDescription(temp_ar[1]); break; } } } } } }