List of usage examples for org.jsoup.select Elements html
public String html()
From source file:org.brunocvcunha.taskerbox.impl.jobs.LinkedInJobSeeker.java
private boolean handleJob(JSONObject job) throws JSONException, ClientProtocolException, IOException, URISyntaxException { if (job.getBoolean("isApplied")) { return false; }/*w ww . j av a2 s .c om*/ long jobId = job.getLong("id"); if (!this.openIds.contains(jobId)) { this.openIds.add(jobId); // uniqueCount++; } else { return false; } String jobTitle = job.getString("fmt_jobTitle").replaceAll("</?B>", ""); if (!this.externalApply && job.has("sourceDomain")) { logInfo(log, jobId + " - " + jobTitle + " - " + job.getString("sourceDomain") + " --> ignored [external]"); String sourceDomain = job.getString("sourceDomain"); if (!sourceDomain.contains("jobvite") && !sourceDomain.contains("ziprecruiter")) { return true; } } String jobEmployer = job.getString("fmt_companyName"); String jobUrl = "https://www.linkedin.com/jobs2/view/" + jobId; if (alreadyPerformedAction(jobUrl)) { return true; } String location = ""; if (job.has("fmt_location")) { location = job.getString("fmt_location"); } String headline = jobUrl + " - " + location + " - " + jobTitle + " - " + jobEmployer; if (job.has("sourceDomain")) { String sourceDomain = job.getString("sourceDomain"); if (this.externalApply && (sourceDomain.contains("empregocerto.uol.com.br") || sourceDomain.contains("jobomas.com") || sourceDomain.contains("curriculum.com.br"))) { logInfo(log, "-- Ignored [externalApply - domain " + sourceDomain + "] " + headline); addAlreadyPerformedAction(jobUrl); return true; } } if (!considerTitle(jobTitle)) { logInfo(log, "-- Ignored [title] " + headline); addAlreadyPerformedAction(jobUrl); return true; } try { FileWriter out = new FileWriter(new File(this.tempDir + "\\job-db\\_titles.txt"), true); out.write(jobTitle + "\r\n"); out.close(); } catch (Exception e) { } if (!considerEmployer(jobEmployer)) { logInfo(log, "-- Ignored [employer] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerLocation(location)) { logInfo(log, "-- Ignored [location] " + headline); addAlreadyPerformedAction(jobUrl); return true; } HttpEntity jobEntity = TaskerboxHttpBox.getInstance().getEntityForURL(jobUrl); String jobResult = TaskerboxHttpBox.getInstance().readResponseFromEntity(jobEntity); Document jobDocument = Jsoup.parse(jobResult); Elements elDescription = jobDocument.select("div.description-section").select("div.rich-text"); Elements elSkills = jobDocument.select("div.skills-section").select("div.rich-text"); // FileWriter out = new FileWriter(new File(tempDir + "\\job-db\\" + jobId + ".txt")); // out.write(elDescription.text() + "\r\n"); // out.write(elSkills.text()); // out.close(); if (!this.externalApply && !jobResult.contains("onsite-apply")) { logInfo(log, "-- Ignored [onsite apply] " + headline); addAlreadyPerformedAction(jobUrl); try { Thread.sleep(5000L); } catch (InterruptedException e) { e.printStackTrace(); } return true; } if (!considerVisaDescription(elDescription.html()) || !considerVisaDescription(elSkills.html())) { logInfo(log, "-- Ignored [visa] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerExperienceDescription(elDescription.html()) || !considerExperienceDescription(elSkills.html())) { logInfo(log, "-- Ignored [exp] " + headline); addAlreadyPerformedAction(jobUrl); return true; } ScorerResult result = LinkedInJobDBComparer.getScore(elDescription.html() + " - " + elSkills.html()); if (result.getScore() < this.requiredScore) { logInfo(log, "-- Ignored [scorer] " + result.getScore() + " - " + result.getMatches() + " - " + headline); addAlreadyPerformedAction(jobUrl); return true; } headline = headline + " - " + result.getMatches(); logInfo(log, headline); logInfo(log, elDescription.html()); if (this.actionCount++ == this.maxCount) { this.setPaused(true); return false; } performUnique(jobUrl); try { Thread.sleep(5000L); } catch (InterruptedException e) { e.printStackTrace(); } return true; }
From source file:cn.scujcc.bug.bitcoinplatformandroid.fragment.QuotationInformationFragment.java
public void getImageAndContent(String url, News news) throws Exception { Document doc = Jsoup.connect(url).get(); Elements image = doc.select(".entry-content img"); news.setImage(image.attr("src")); Elements content = doc.select(".entry-content"); news.setContent(content.html()); }
From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageViewFragment.java
/** * Format the user post by removing the vb style quotes and the * duplicate youtube links//from www . j a va2 s. c o m * @param innerPost The element that contains the inner post * @return The formatted string */ private String formatUserPost(Elements innerPost) { // Remove the duplicate youtube links (this is caused by a plugin on // the forum that embeds youtube videos automatically) for (Element embedded : innerPost.select("div[id^=ame_doshow_post_]")) embedded.remove(); // Remove the vbulletin quotes String upost = Utils.reformatQuotes(innerPost.html()); return upost; }
From source file:org.jtotus.network.NordnetConnect.java
public boolean authenticated() { String loginPage = null;//from w w w . j a v a 2 s . c o m if (connector == null) { System.err.printf("Failure connector is empty\n"); return false; } loginPage = connector.getPage(_PORTFOLIO_URL_); if (loginPage == null) { System.err.printf("Failure unable to fetch portfolio\n"); return false; } Document doc = Jsoup.parse(loginPage); Elements elements = doc.select("title"); //FIXME: UTF-8 for httpclient! if (elements.html().equals("Yleisnäkymä - Nordnet")) { return true; } else { System.err.printf("Failure in match for : %s \n", elements.html()); } return false; }
From source file:org.jtotus.network.NordnetConnect.java
private StockTick parseAuthenticatedStream(String infoPage, String stockName) { StockTick tick = null;/*from ww w.j a v a 2 s .c om*/ Document doc = Jsoup.parse(infoPage); Elements elements = doc.select("tr[class=first]"); doc = Jsoup.parse(elements.html()); elements = doc.select("td"); if (elements.size() != 15) { //not authenticated 13 return tick; } tick = new StockTick(); tick.setStockName(stockName); Iterator<Element> iter = elements.iterator(); for (int count = 0; iter.hasNext(); count++) { Element elem = iter.next(); log.info("Element value (" + count + "):" + elem.text()); switch (count) { case 3: if (!elem.text().equalsIgnoreCase("OMX Helsinki")) { System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName); return null; } break; case 4://latest price tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 5://latest buy tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 6://latest sell tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 7://latest Highest tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 8://latest Lowest tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 11://latest Lowest tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim())); break; case 12://latest Lowest tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim())); break; case 14://Time tick.setTime(elem.text().trim()); break; //TODO:currency and time default: log.info("Not matched(" + count + ") = " + elem.text()); break; } } log.info("StockTick:" + tick.toString()); return tick; }
From source file:org.jtotus.network.NordnetConnect.java
private StockTick parseNonAuthenticatedStream(String infoPage, String stockName) { StockTick tick = null;/*from w w w.ja v a 2 s. c o m*/ Document doc = Jsoup.parse(infoPage); Elements elements = doc.select("tr[class=first]"); doc = Jsoup.parse(elements.html()); elements = doc.select("td"); if (elements.size() != 13) { //not authenticated 13 return tick; } tick = new StockTick(); tick.setStockName(stockName); Iterator<Element> iter = elements.iterator(); for (int count = 0; iter.hasNext(); count++) { Element elem = iter.next(); System.out.printf("Non-Auth Element value (%d):%s for:%s\n", count, elem.text(), stockName); switch (count) { case 1: if (!elem.text().equalsIgnoreCase("OMX Helsinki")) { System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName); return null; } break; case 2://latest price tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 3://latest buy tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 4://latest sell tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 5://latest Highest tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 6://latest Lowest tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim())); break; case 9://Volume tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim())); break; case 10://Trade Sum tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim())); break; case 12://Time tick.setTime(elem.text().trim()); break; //TODO:currency and time default: System.out.printf("Not matched(%d) = %s \n", count, elem.text()); break; } } System.out.printf("StockTick:%s\n", tick.toString()); return tick; }
From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java
@Override protected void fillCollectionSet(String urlString, Request request, CollectionAgent agent, XmlCollectionSet collectionSet, XmlSource source) throws Exception { XmlCollectionResource nodeResource = new XmlSingleInstanceCollectionResource(agent); Document doc = getJsoupDocument(urlString, request); for (XmlGroup group : source.getXmlGroups()) { LOG.debug("fillCollectionSet: getting resources for XML group {} using selector {}", group.getName(), group.getResourceXpath()); Date timestamp = getTimeStamp(doc, group); Elements elements = doc.select(group.getResourceXpath()); LOG.debug("fillCollectionSet: {} => {}", group.getResourceXpath(), elements); String resourceName = getResourceName(elements, group); LOG.debug("fillCollectionSet: processing XML resource {}", resourceName); XmlCollectionResource collectionResource; if (group.getResourceType().equalsIgnoreCase(CollectionResource.RESOURCE_TYPE_NODE)) { collectionResource = nodeResource; } else {// w w w . ja v a 2 s .co m collectionResource = getCollectionResource(agent, resourceName, group.getResourceType(), timestamp); } LOG.debug("fillCollectionSet: processing resource {}", collectionResource); AttributeGroupType attribGroupType = new AttributeGroupType(group.getName(), group.getIfType()); for (XmlObject object : group.getXmlObjects()) { Elements el = elements.select(object.getXpath()); XmlCollectionAttributeType attribType = new XmlCollectionAttributeType(object, attribGroupType); collectionResource.setAttributeValue(attribType, el == null ? null : el.html()); } processXmlResource(collectionResource, attribGroupType); collectionSet.getCollectionResources().add(collectionResource); } }
From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java
/** * Gets the resource name./* w w w .j a va2s .c om*/ * * @param elements the JSoup elements * @param group the group * @return the resource name */ private String getResourceName(Elements elements, XmlGroup group) { // Processing multiple-key resource name. if (group.hasMultipleResourceKey()) { List<String> keys = new ArrayList<String>(); for (String key : group.getXmlResourceKey().getKeyXpathList()) { LOG.debug("getResourceName: getting key for resource's name using selector {}", key); Elements el = elements.select(key); if (el != null) { keys.add(el.html()); } } return StringUtils.join(keys, "_"); } // If key-xpath doesn't exist or not found, a node resource will be assumed. if (group.getKeyXpath() == null) { return "node"; } // Processing single-key resource name. LOG.debug("getResourceName: getting key for resource's name using selector {}", group.getKeyXpath()); Elements el = elements.select(group.getKeyXpath()); return el == null ? null : el.html(); }
From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java
/** * Gets the time stamp.//from w w w .j a va 2 s . c o m * * @param document the JSoup document * @param group the group * @return the time stamp */ protected Date getTimeStamp(Document doc, XmlGroup group) { if (group.getTimestampXpath() == null) { return null; } String pattern = group.getTimestampFormat() == null ? "yyyy-MM-dd HH:mm:ss" : group.getTimestampFormat(); LOG.debug( "getTimeStamp: retrieving custom timestamp to be used when updating RRDs using selector {} and pattern {}", group.getTimestampXpath(), pattern); Elements el = doc.select(group.getTimestampXpath()); if (el == null) { return null; } String value = el.html(); Date date = null; try { DateTimeFormatter dtf = DateTimeFormat.forPattern(pattern); DateTime dateTime = dtf.parseDateTime(value); date = dateTime.toDate(); } catch (Exception e) { LOG.warn("getTimeStamp: can't convert custom timetime {} using pattern {}", value, pattern); } return date; }
From source file:org.opens.rules.doc.utils.ruledesign.extractor.ExtractRuleDesignHtmlCode.java
/** * Before using it please set the FOLDER variable with the path where you * want to create your extract html files. * * @param args the command line arguments *//* ww w . j av a 2s.com*/ public static void main(String[] args) { // first boucle for is for the theme number for (int i = 1; i < MAX_THEME_NUMBER; i++) { // second boucle for is for the critere number for (int j = 1; j < MAX_CRITERE_NUMBER; j++) { // third boucle for is for the test number for (int k = 1; k < MAX_TEST_NUMBER; k++) { URL url = null; try { Connection connection = Jsoup.connect(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k); Connection.Response resp = connection.response(); if (resp.statusCode() != 404) { url = new URL(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k); Document doc = Jsoup.parse(url, 4000); System.out.println(doc.title()); Elements summary = doc.select(".content.clear-block"); FileUtils.writeStringToFile( new File(FOLDER + "/RuleDesign/Rule-" + i + "-" + j + "-" + k + ".html"), summary.html()); } } catch (MalformedURLException ex) { System.out.println("URL MAL FORMEE"); } catch (IOException ex) { if (url != null) { System.out.println("URL 404 : " + url.toString()); } else { System.out.println("EMPTY URL"); } } } } } }