List of usage examples for org.jsoup.select Elements text
public String text()
From source file:GIST.IzbirkomExtractor.TableExtractor.java
/** * Cleaning up leftover of HTML code from the cell content. * //from w w w .j a v a2 s.co m * @param cell_content HTML code contains in the table cell * @return an array list containing each line of the cell_content withh all HTML markup removed */ private ArrayList<String> cleanLeftoverHTML(Element cell_content) { ArrayList<String> streets_and_numbers = new ArrayList<String>(); /* <div>s designate separate lines inside the table cell */ for (Element addr_line : cell_content.getElementsByTag("div")) { /* skip empty address lines */ String addr_line_text = cleanupUNICODE(addr_line.text()); if (StringUtils.isBlank(addr_line_text)) continue; /* <strong> is not particularly useful, but can designate placement of simple separators like space */ Elements streets = addr_line.getElementsByTag("strong"); if (!streets.isEmpty()) { addr_line_text = addr_line_text.replaceFirst(Pattern.quote(streets.text()), " " + streets.text() + " "); } streets_and_numbers.add(addr_line_text); } return streets_and_numbers; }
From source file:com.FFLive.Player.java
public void getPlayer() { try {//from ww w . j av a 2 s .c om if (playerID.equals("-1")) { //Average Team Player... Document doc = Jsoup.connect("http://fantasy.premierleague.com/entry/1/event-history/" + GW + "/") .get(); Elements averageScore = doc.select("div.ismUnit.ismSize2of5.ismLastUnit") .select("div.ismSBSecondaryVal"); if (averageScore.isEmpty()) { playerScore = 0; } else { try { playerScore = Integer.parseInt(averageScore.text().replaceAll("\\D+", "")); } catch (NumberFormatException n) { Main.log.log(2, "Issue saving Average Team..." + n + "\n"); } } playerName = "Average"; } else { Main.log.log(7, "Fetching Player " + playerID + "\n"); //Connects to the players info page InputStream playerJson = new URL( "http://fantasy.premierleague.com/web/api/elements/" + playerID + "/").openStream(); //Reads the data into a JSON object (via casting into a regular object) Reader reader = new InputStreamReader(playerJson, "UTF-8"); JSONObject playerValues = (JSONObject) JSONValue.parse(reader); //TODO Check if there are values overlength //Max Length Ref playerCount INT DEFAULT 1 NOT NULL, firstName VARCHAR(40), lastName VARCHAR(40), //webName VARCHAR(50), score INT, gameweekBreakdown VARCHAR(250), breakdown VARCHAR(250), //teamName VARCHAR(40), currentFixture VARCHAR(40), nextFixture VARCHAR(40), status VARCHAR(10), //news VARCHAR(250), photo VARCHAR(30)) //Adds Required Data firstName = playerValues.get("first_name").toString(); lastName = playerValues.get("second_name").toString(); playerName = playerValues.get("web_name").toString(); playerTeam = playerValues.get("team_name").toString(); teamNumber = Integer.parseInt(playerValues.get("team_id").toString()); position = playerValues.get("type_name").toString(); /* JSONObject test = (JSONObject)JSONValue.parse(playerValues.get("fixture_history").toString()); String summary = test.get("summary").toString(); String all = test.get("all").toString(); */ playerScore = Integer.parseInt(playerValues.get("event_total").toString()); gameweekBreakdown = playerValues.get("event_explain").toString(); //scoreBreakdown = playerValues.get("fixture_history").toString(); currentFixture = playerValues.get("current_fixture").toString(); nextFixture = playerValues.get("next_fixture").toString(); status = playerValues.get("status").toString(); news = playerValues.get("news").toString(); photo = playerValues.get("photo").toString(); /* System.out.println(firstName); System.out.println(lastName); System.out.println(playerName); System.out.println(playerTeam); System.out.println(position); System.out.println(summary); System.out.println(all); System.out.println(playerScore); System.out.println(scoreBreakdown); System.out.println(currentFixture); System.out.println(nextFixture); System.out.println(status); System.out.println(news); System.out.println(photo);*/ } } catch (ConnectException c) { if (timeoutCheck() > 3) { Main.log.log(2, "Too Many Timeouts.. Skipping\n"); } Main.log.log(6, "Timeout Connecting, Retrying...\n"); getPlayer(); } catch (SocketTimeoutException e) { if (timeoutCheck() > 3) { Main.log.log(2, "Too Many Timeouts.. Skipping\n"); } Main.log.log(6, "Timeout Connecting, Retrying...\n"); getPlayer(); } catch (UnknownHostException g) { Main.log.log(6, "No Connection... Skipping\n"); } catch (NoRouteToHostException h) { Main.log.log(6, "No Connection... Skipping\n"); } catch (IOException f) { Main.log.log(6, "In getPlayer: " + f + "\n"); } catch (NullPointerException n) { Main.log.log(2, "Missing Player Field with ID:" + playerID + " " + n + "\n"); Main.log.log(9, n); } }
From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java
public String getResults() throws EmptyQueryException, ManyResultsException, CaptchaException, UnsupportedEncodingException { if (this.query.isEmpty()) { throw new EmptyQueryException(); }/*from w w w. j a v a 2 s . c o m*/ Document doc = getData(this.query); Elements data = doc.select(".st"); if (data.size() > 1) { throw new ManyResultsException(); } return data.text(); }
From source file:gov.medicaid.screening.dao.impl.OIGDAOBean.java
/** * Parses the excluded provider profile details page. * * @param page the details page/*from w w w .j a va 2s . c o m*/ * @return the parsed license details * @throws ParsingException if the expected tags were not found */ private ProviderProfile parseProfile(Document page) throws ParsingException { ProviderProfile profile = new ProviderProfile(); // name User user = new User(); profile.setUser(user); user.setLastName(page.select("th:containsOwn(Last Name) + td").text()); user.setFirstName(page.select("th:containsOwn(First Name) + td").text()); // business String businessName = page.select("th:containsOwn(Entity) + td").text(); if (!"N/A".equals(businessName)) { Business business = new Business(); profile.setBusiness(business); business.setName(businessName); } // DOB Date dob = parseDate(page.select("th:has(acronym:containsOwn(DOB)) + td").text(), DATE_FORMAT); if (dob != null) { profile.setDob(dob); } // exclusion type ExclusionType exclusionType = new ExclusionType(); profile.setExclusionType(exclusionType); exclusionType.setName(page.select("th:containsOwn(Excl. Type) + td").text()); // specialty List<Specialty> specialties = new ArrayList<Specialty>(); Specialty specialty = new Specialty(); specialties.add(specialty); specialty.setName(page.select("th:containsOwn(Specialty) + td").text()); profile.setSpecialties(specialties); // address Elements addrElement = page.select("th:containsOwn(Address) + td"); String addr = addrElement.text(); Element addrNextRow = addrElement.parents().first().nextElementSibling(); if ("".equals(addrNextRow.select("th").text())) { addr += " " + addrNextRow.select("td").text(); } Address address = new Address(); address.setLocation(addr); profile.setAddresses(Arrays.asList(new Address[] { address })); Date date = parseDate(page.select("th:containsOwn(Excl. Date) + td").text(), DATE_FORMAT); if (date != null) { profile.setRequestEffectiveDate(date); } return profile; }
From source file:com.quarterfull.newsAndroid.NewsDetailFragment.java
public void onCreateContextMenu(ContextMenu menu, View v, ContextMenu.ContextMenuInfo menuInfo) { if (v instanceof WebView) { WebView.HitTestResult result = ((WebView) v).getHitTestResult(); if (result != null) { int type = result.getType(); Document htmldoc = Jsoup.parse(html); FragmentTransaction ft = getFragmentManager().beginTransaction(); if (type == WebView.HitTestResult.IMAGE_TYPE || type == WebView.HitTestResult.SRC_IMAGE_ANCHOR_TYPE) { String imageUrl = result.getExtra(); if (imageUrl.startsWith("http") || imageUrl.startsWith("file")) { URL mImageUrl; String imgtitle; String imgaltval; String imgsrcval; imgsrcval = imageUrl.substring(imageUrl.lastIndexOf('/') + 1, imageUrl.length()); Elements imgtag = htmldoc.getElementsByAttributeValueContaining("src", imageUrl); try { imgtitle = imgtag.first().attr("title"); } catch (NullPointerException e) { imgtitle = ""; }/*w w w . ja va2s . c o m*/ try { imgaltval = imgtag.first().attr("alt"); } catch (NullPointerException e) { imgaltval = ""; } try { mImageUrl = new URL(imageUrl); } catch (MalformedURLException e) { return; } String title = imgsrcval; int titleIcon = android.R.drawable.ic_menu_gallery; String text = (imgtitle.isEmpty()) ? imgaltval : imgtitle; // Create and show the dialog. DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceImage(title, titleIcon, text, mImageUrl); newFragment.show(ft, "menu_fragment_dialog"); } } else if (type == WebView.HitTestResult.SRC_ANCHOR_TYPE) { String url = result.getExtra(); URL mUrl; String text; try { Elements urltag = htmldoc.getElementsByAttributeValueContaining("href", url); text = urltag.text(); mUrl = new URL(url); } catch (MalformedURLException e) { return; } // Create and show the dialog. DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceUrl(text, mUrl.toString()); newFragment.show(ft, "menu_fragment_dialog"); } //else if (type == WebView.HitTestResult.EMAIL_TYPE) { } //else if (type == WebView.HitTestResult.GEO_TYPE) { } //else if (type == WebView.HitTestResult.PHONE_TYPE) { } //else if (type == WebView.HitTestResult.EDIT_TEXT_TYPE) { } } } }
From source file:Leitura.Jxr.java
public String leituraJxr() throws IOException { //mtodo para pegar os nomes dos mtodos declarados Elements elements = document.getElementsByTag("pre"); elements.select("a.jxr_linenumber").remove(); // elements.select("strong.jxr_keyword").remove(); // elements.select("span.jxr_string").remove(); // elements.select("em.jxr_comment").remove(); for (Element children : elements) { children.getElementsByClass("jxr_comment").remove(); children.getElementsByClass("jxr_javadoccomment").remove(); }//w w w . j a v a 2 s .com return elements.text(); // retorna o cdigo sem lixo }
From source file:com.qkj.qkjmanage.action.OilManageAction.java
public void getOilPrice() throws Exception { List<String> prices = new ArrayList<>(); //?//from w w w . j a v a2 s. co m try { Document doc = null; doc = Jsoup.connect("http://ny.gold600.com/qinghai.html").get(); Elements element1 = doc.getElementsByClass("JO_330q63"); Elements element2 = doc.getElementsByClass("JO_331q63"); Elements element3 = doc.getElementsByClass("JO_332q63"); prices.add(element1.text()); prices.add(element2.text()); prices.add(element3.text()); HttpServletRequest request = ServletActionContext.getRequest(); JSONArray jsonArray = JSONArray.fromObject(prices); HttpServletResponse response = ServletActionContext.getResponse(); response.setContentType("text/html;charset=UTF-8"); response.getWriter().print(jsonArray); } catch (Exception e) { log.error(this.getClass().getName() + "!getOilPrice ??:", e); throw new Exception(this.getClass().getName() + "!getOilPrice ??:", e); } }
From source file:abelymiguel.miralaprima.GetPrima.java
private HashMap<String, Float> getPrimaDataBloom(String country_code, String providerUrl, String indexName) { HashMap<String, Float> respuestaJson = new HashMap<String, Float>(); HashMap<String, Object> primaJson; Float prima_value;//from w w w . ja v a 2 s . c o m Float prima_delta; Float prima_percent; Document doc; try { doc = Jsoup.connect(providerUrl + indexName).get(); Element riskPremium = doc.select(".price").last(); // System.out.println("Prima: " + riskPremium.text()); prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue(); Elements riskPremiumsUp = doc.select(".trending_up"); Elements riskPremiumsDown = doc.select(".trending_down"); // System.out.println("Trending: " + riskPremiumsUp.text()); // System.out.println("Trending: " + riskPremiumsDown.text()); if (!riskPremiumsUp.text().equals("")) { String delta = riskPremiumsUp.text(); prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue(); // System.out.println("Delta: " + prima_delta); String percent = riskPremiumsUp.text(); prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1)) .floatValue(); // System.out.println("Percent: " + prima_percent); } else if (!riskPremiumsDown.text().equals("")) { String delta = riskPremiumsDown.text(); prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue(); prima_delta = prima_delta * -1; // System.out.println("Delta: " + prima_delta); String percent = riskPremiumsDown.text(); prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1)) .floatValue(); prima_percent = prima_percent * -1; // System.out.println("Percent: " + prima_percent); } else { prima_delta = 0f; prima_percent = 0f; } respuestaJson.put("prima_value", prima_value); respuestaJson.put("prima_delta", prima_delta); respuestaJson.put("prima_percent", prima_percent); if (isSameDay(country_code)) { this.updatePrimaInDB(prima_value, prima_delta, prima_percent, this.getLatestPrimaIdFromDB(country_code)); } else { this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code); } } catch (Exception ex) { Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex); primaJson = getLatestPrimaFromDB(country_code); respuestaJson.put("prima_value", (Float) primaJson.get("prima_value")); respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta")); respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent")); } return respuestaJson; }
From source file:net.kevxu.purdueassist.course.ScheduleDetail.java
private ScheduleDetailEntry parseDocument(Document document) throws HtmlParseException, CourseNotFoundException, ResultNotMatchException { ScheduleDetailEntry entry = new ScheduleDetailEntry(term, crn); Elements tableElements = document.getElementsByAttributeValue("summary", "This table is used to present the detailed class information."); if (!tableElements.isEmpty()) { for (Element tableElement : tableElements) { // get basic info for selected course Element tableBasicInfoElement = tableElement.getElementsByClass("ddlabel").first(); if (tableBasicInfoElement != null) { setBasicInfo(entry, tableBasicInfoElement.text()); } else { throw new HtmlParseException("Basic info element empty."); }/*from www. jav a 2 s . c om*/ // get detailed course info Element tableDetailedInfoElement = tableElement.getElementsByClass("dddefault").first(); if (tableDetailedInfoElement != null) { // process seat info Elements tableSeatDetailElements = tableDetailedInfoElement.getElementsByAttributeValue( "summary", "This layout table is used to present the seating numbers."); if (tableSeatDetailElements.size() == 1) { Element tableSeatDetailElement = tableSeatDetailElements.first(); Elements tableSeatDetailEntryElements = tableSeatDetailElement.getElementsByTag("tbody") .first().children(); if (tableSeatDetailEntryElements.size() == 3 || tableSeatDetailEntryElements.size() == 4) { setSeats(entry, tableSeatDetailEntryElements.get(1).text()); setWaitlistSeats(entry, tableSeatDetailEntryElements.get(2).text()); if (tableSeatDetailEntryElements.size() == 4) { setCrosslistSeats(entry, tableSeatDetailEntryElements.get(3).text()); } } else { throw new HtmlParseException("Seat detail entry elements size not 3. We have " + tableSeatDetailEntryElements.size() + "."); } } else { throw new HtmlParseException( "Seat detail elements size not 1. We have " + tableSeatDetailElements.size() + "."); } // remove the seat info from detailed info tableSeatDetailElements.remove(); // remaining information setRemainingInfo(entry, tableDetailedInfoElement.html()); } else { throw new HtmlParseException("Detailed info element empty."); } } } else { // test empty Elements informationElements = document.getElementsByAttributeValue("summary", "This layout table holds message information"); if (!informationElements.isEmpty() && informationElements.text().contains("No detailed class information found")) { throw new CourseNotFoundException(informationElements.text()); } else { throw new HtmlParseException( "Course table not found, but page does not contain message stating no course found."); } } return entry; }