List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:web.analyzer.utils.Utils.java
public List<Heading> docHeadingsProcess(Document doc) { List<Heading> headingList = new ArrayList<Heading>(); int level = 0; Elements eles = doc.select("*"); for (Element ele : eles) { level++;//from ww w . ja v a2 s. c o m if (HEADING_TAG.contains(ele.tagName())) { headingList.add(new Heading(ele.tagName(), ele.html(), level)); } if (ele.children().size() == 0) { level = 0; continue; } else { eles = ele.children(); } } return headingList; }
From source file:br.ufsc.das.gtscted.shibbauth.ShibAuthenticationActivity.java
/** Called when the activity is first created. */ @Override/* w w w.jav a 2 s.c o m*/ public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.idp_selection); loginButton = (Button) findViewById(R.id.loginButton); backButton = (Button) findViewById(R.id.backButton); usernameTxt = (EditText) findViewById(R.id.usernameTxt); passwordTxt = (EditText) findViewById(R.id.passwordTxt); idpSpinner = (Spinner) findViewById(R.id.idpSpinner); //Configura o ArrayAdapter do spinner. ArrayAdapter<CharSequence> spinnerArrayAdapter; spinnerArrayAdapter = new ArrayAdapter(this, android.R.layout.simple_spinner_item); spinnerArrayAdapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item); idpSpinner.setAdapter(spinnerArrayAdapter); // Obtm os parmetros passados pela Activity anterior // (no caso, a pgina do WAYF como uma String e o // nico cookie da Connection usada anteriormente) Bundle bundle = this.getIntent().getExtras(); String wayfHtml = bundle.getString("html_source"); final String wayfLocation = bundle.getString("wayf_location"); final SerializableCookie receivedCookie = (SerializableCookie) bundle.getSerializable("cookie"); //Obtm todos os tags de nome "option", que correspondem // aos IdPs, da pgina do WAYF. Document wayfDocument = Jsoup.parse(wayfHtml); idpElements = wayfDocument.select("option"); //Popula o spinner com os nomes dos IdPs encontrados. for (Element idpElement : idpElements) { String idpName = idpElement.text(); spinnerArrayAdapter.add(idpName); } // Obtm o caminho para o qual deve ser passado o IdP do usurio. formElements = wayfDocument.select("form"); for (Element formElement : formElements) { if (formElement.attr("id").equals("IdPList")) { wayfActionPath = formElement.attr("action"); } } loginButton.setOnClickListener(new View.OnClickListener() { @Override public void onClick(View v) { // Obtm a URL correspondente ao idP selecionado no spinner. int selectedIdpPosition = idpSpinner.getSelectedItemPosition(); Element selectedIdp = idpElements.get(selectedIdpPosition); selectedIdpUrl = selectedIdp.attr("value"); try { // Obtm os campos "username" e "password" fornecidos // pelo usurio e necessrios para a autenticao. String username = usernameTxt.getText().toString(); String password = passwordTxt.getText().toString(); // Cria um novo objeto Connection, e adiciona o // cookie passado pela Activity anterior. Connection connection = new Connection(); BasicClientCookie newCookie = new BasicClientCookie(receivedCookie.getName(), receivedCookie.getValue()); newCookie.setDomain(receivedCookie.getDomain()); connection.addCookie(newCookie); // Tenta realizar a autenticao no IdP selecionado. O resultado corresponde // pgina para a qual o cliente redirecionado em caso de autenticao // bem-sucedida. String authResult = connection.authenticate(wayfLocation, wayfActionPath, selectedIdpUrl, username, password); // Apenas mostra o recurso que o usurio queria acessar (neste caso, mostra a pg. de // "Homologao de atributos"). Intent newIntent = new Intent(ShibAuthenticationActivity.this.getApplicationContext(), TestActivity.class); Bundle bundle = new Bundle(); bundle.putString("arg", authResult); newIntent.putExtras(bundle); startActivity(newIntent); } catch (IOException e) { String message = "IOException - problema na conexo"; Toast toast = Toast.makeText(getApplicationContext(), message, Toast.LENGTH_LONG); toast.show(); } catch (Exception e) { String message = "Exception - problema na autenticao"; Toast toast = Toast.makeText(getApplicationContext(), message, Toast.LENGTH_LONG); toast.show(); } } }); backButton.setOnClickListener(new View.OnClickListener() { @Override public void onClick(View v) { finish(); } }); }
From source file:co.foxdev.foxbot.utils.Utils.java
public static String parseChatUrl(String stringToParse, User sender) { try {/*from www.j a va 2 s.co m*/ Connection conn = Jsoup.connect(stringToParse); conn.followRedirects(true).userAgent( "FoxBot // http://foxbot.foxdev.co // Seeing this? It means your web address was posted on IRC and FoxBot is getting page info (title, size, content type) to send to the channel. Nothing to worry about.") .timeout(3000).maxBodySize(100000).ignoreContentType(true); Connection.Response response = conn.execute(); Document doc = response.parse(); String size = response.header("Content-Length") == null ? "Unknown" : (Integer.parseInt(response.header("Content-Length")) / 1024) + "kb"; String contentType = response.contentType().contains(";") ? response.contentType().split(";")[0] : response.contentType(); if (response.statusCode() != 200 && response.statusCode() != 302 && response.statusCode() != 301) { return colourise(String.format("(%s's URL) &cError: &r%s %s ", munge(sender.getNick()), response.statusCode(), response.statusMessage())); } if (!contentType.contains("html")) { return colourise(String.format("(%s's URL) &2Content Type: &r%s &2Size: &r%s", munge(sender.getNick()), contentType, size)); } String title = doc.title() == null || doc.title().isEmpty() ? "No title found" : doc.title(); if (stringToParse.matches("^https?://(www\\.)?youtube\\.com/watch.*")) { title = doc.select("span#eow-title").first().text(); String views = doc.select("span.watch-view-count").first().text(); String likes = doc.select("span.likes-count").first().text(); String dislikes = doc.select("span.dislikes-count").first().text(); String uploader = doc.select("a.g-hovercard.yt-uix-sessionlink.yt-user-name").first().text(); return colourise(String.format( "(%s's URL) &2Title: &r%s &2Uploader: &r%s &2Views: &r%s &2Rating: &a%s&r/&c%s", munge(sender.getNick()), StringEscapeUtils.unescapeHtml4(title), uploader, views, likes, dislikes)); } if (stringToParse.matches("^https?://(www\\.)?reddit\\.com/r/.*/comments/.*")) { String poster = doc.select("p.tagline").select("a.author").text().split(" ")[0]; String comments = doc.select("a.comments").first().text().split(" ")[0]; String likes = doc.select("span.upvotes").first().text().split(" ")[0]; String dislikes = doc.select("span.downvotes").first().text().split(" ")[0]; return colourise(String.format( "(%s's URL) &2Title: &r%s &2Poster: &r%s &2Comments: &r%s &2Rating: &6%s&r/&9%s", munge(sender.getNick()), StringEscapeUtils.unescapeHtml4(title), poster, comments, likes, dislikes)); } return colourise(String.format("(%s's URL) &2Title: &r%s &2Content Type: &r%s &2Size: &r%s", munge(sender.getNick()), StringEscapeUtils.unescapeHtml4(title), contentType, size)); } catch (IllegalArgumentException ignored) { } catch (Exception ex) { foxbot.getLogger().error("Error occurred while parsing URL", ex); } return null; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
public Message parseJsoup(String msgString) { Message message = null;/*from w ww .j av a 2 s .co m*/ try { String s = msgString.replaceAll("[\n\r]", ""); org.jsoup.nodes.Document doc = Jsoup.parse(s); String eventName = doc.select("ns1|eventName").text(); if (eventName.equals(IdentityRecommendation)) message = parseIdentityRecommendation(doc); else if (eventName.equals(IdentityVerification)) message = parseIdentityVerification(doc); else if (eventName.equals(IssueRecommendation)) message = parseIssueRecommendation(doc); else if (eventName.equals(SimilarIssueRssfeed)) parseIssueAlertIusse(doc); else if (eventName.equals(SimilarIssues)) message = parseSimilarIssues(doc); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:org.brunocvcunha.taskerbox.impl.jobs.DiceJobSeeker.java
private boolean handleJob(String jobTitle, String jobEmployer, String location, String jobUrl) throws JSONException, ClientProtocolException, IOException, URISyntaxException { if (alreadyPerformedAction(jobUrl)) { return true; }// w ww .j ava 2s . c o m String headline = jobUrl + " - " + location + " - " + jobTitle + " - " + jobEmployer; System.out.println(headline); if (!considerTitle(jobTitle)) { logInfo(log, "-- Ignored [title] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerEmployer(jobEmployer)) { logInfo(log, "-- Ignored [employer] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerLocation(location)) { logInfo(log, "-- Ignored [location] " + headline); addAlreadyPerformedAction(jobUrl); return true; } HttpEntity jobEntity = TaskerboxHttpBox.getInstance().getEntityForURL(jobUrl); String jobResult = TaskerboxHttpBox.getInstance().readResponseFromEntity(jobEntity); Document jobDocument = Jsoup.parse(jobResult); Elements elDescription = jobDocument.select("div.job_description"); if (elDescription.isEmpty()) { elDescription = jobDocument.select("div#detailDescription"); } /* * if (!jobDocument.html().contains("ApplyOnlineUrl: ''") && * !jobDocument.html().contains("ApplyOnlineUrl: 'http://my.monster.com") && !externalApply) { * logInfo(log, "-- Ignored [externalApply] " + headline); addAlreadyPerformedAction(jobUrl); * return true; } */ if (!considerVisaDescription(elDescription.html())) { logInfo(log, "-- Ignored [visa] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerExperienceDescription(elDescription.html())) { log.info("-- Ignored [exp] " + headline); addAlreadyPerformedAction(jobUrl); return true; } ScorerResult result = LinkedInJobDBComparer.getScore(elDescription.html()); if (result.getScore() < this.requiredScore) { logInfo(log, "-- Ignored [scorer] " + result.getScore() + " - " + result.getMatches() + " - " + headline); addAlreadyPerformedAction(jobUrl); return true; } headline = headline + " - " + result.getMatches(); logInfo(log, "Open --> " + headline); // logInfo(log, elDescription.html()); performUnique(jobUrl); try { Thread.sleep(5000L); } catch (InterruptedException e) { e.printStackTrace(); } return true; }
From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java
private void assertServiceDetails(Service service, Document document) { final Elements serviceDetails = document.select(".tab-content").attr("ng-show", "currenttab == 'service-details'"); final String serviceDetailsText = serviceDetails.text(); assertThat(serviceDetailsText, containsString(service.id())); assertThat(serviceDetailsText, containsString(service.spec().name())); assertThat(serviceDetailsText, containsString(service.spec().taskTemplate().containerSpec().image())); }
From source file:com.ignorelist.kassandra.steam.scraper.HtmlTagLoader.java
@Override public GameInfo load(Long gameId, EnumSet<TagType> types) { GameInfo gameInfo = new GameInfo(); gameInfo.setId(gameId);// ww w. j av a 2 s . c om try { if (!types.isEmpty()) { InputStream inputStream = cache.get(gameId.toString()); try { Document document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), buildPageUrl(gameId)); Elements appName = document.select("div.apphub_AppName"); Element nameElement = Iterables.getFirst(appName, null); if (null != nameElement && null != nameElement.text()) { gameInfo.setName(nameElement.text().trim()); } Elements appIconElements = document.select("div.apphub_AppIcon img"); gameInfo.setIcon(getSrcUri(appIconElements)); Elements headerImageElements = document.select("img.game_header_image_full"); gameInfo.setHeaderImage(getSrcUri(headerImageElements)); final SetMultimap<TagType, String> tags = gameInfo.getTags(); if (types.contains(TagType.CATEGORY)) { Elements categories = document.select("div#category_block a.name"); copyText(categories, tags.get(TagType.CATEGORY)); } if (types.contains(TagType.GENRE)) { Elements genres = document.select("div.details_block a[href*=/genre/]"); copyText(genres, tags.get(TagType.GENRE)); } if (types.contains(TagType.USER)) { Elements userTags = document.select("a.app_tag"); copyText(Iterables.filter(userTags, Predicates.not(DISPLAY_NONE_PREDICATE)), tags.get(TagType.USER)); copyText(Iterables.filter(userTags, DISPLAY_NONE_PREDICATE), tags.get(TagType.USER_HIDDEN)); } if (types.contains(TagType.VR)) { Elements vrSupport = document .select("div.game_area_details_specs a.name[href*=#vrsupport="); copyText(vrSupport, tags.get(TagType.VR)); } } finally { IOUtils.closeQuietly(inputStream); } } } catch (ExecutionException ex) { Logger.getLogger(HtmlTagLoader.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(HtmlTagLoader.class.getName()).log(Level.SEVERE, null, ex); } return gameInfo; }
From source file:me.vertretungsplan.parser.SVPlanParser.java
@NotNull SubstitutionSchedule parseSVPlanSchedule(List<Document> docs) throws IOException, JSONException { SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); for (Document doc : docs) { if (doc.select(".svp").size() > 0) { for (Element svp : doc.select(".svp")) { parseSvPlanDay(v, svp, doc); }//from w w w.j a v a 2s.c o m } else if (doc.select(".Trennlinie").size() > 0) { Element div = new Element(Tag.valueOf("div"), ""); for (Node node : doc.body().childNodesCopy()) { if (node instanceof Element && ((Element) node).hasClass("Trennlinie") && div.select("table").size() > 0) { parseSvPlanDay(v, div, doc); div = new Element(Tag.valueOf("div"), ""); } else { div.appendChild(node); } } parseSvPlanDay(v, div, doc); } else { parseSvPlanDay(v, doc, doc); } } v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; }
From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java
private void assertServiceLog(Document document, String logs) { final Elements logDetails = document.select(".service-logs").select("textarea"); assertThat(logDetails.val(), is(logs)); }
From source file:org.brunocvcunha.taskerbox.impl.jobs.MonsterJobSeeker.java
private boolean handleJob(String jobTitle, String jobEmployer, String location, String jobUrl) throws JSONException, ClientProtocolException, IOException, URISyntaxException { if (alreadyPerformedAction(jobUrl)) { return true; }// ww w .j a v a 2s . com String headline = jobUrl + " - " + location + " - " + jobTitle + " - " + jobEmployer; if (!considerTitle(jobTitle)) { logInfo(log, "-- Ignored [title] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerEmployer(jobEmployer)) { logInfo(log, "-- Ignored [employer] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerLocation(location)) { logInfo(log, "-- Ignored [location] " + headline); addAlreadyPerformedAction(jobUrl); return true; } try { Thread.sleep(1000L); } catch (InterruptedException e) { e.printStackTrace(); } HttpEntity jobEntity = TaskerboxHttpBox.getInstance().getEntityForURL(jobUrl); String jobResult = TaskerboxHttpBox.getInstance().readResponseFromEntity(jobEntity); Document jobDocument = Jsoup.parse(jobResult); Elements elDescription = jobDocument.select("div#jobBodyContent"); if (!jobDocument.html().contains("ApplyOnlineUrl: ''") && !jobDocument.html().contains("ApplyOnlineUrl: 'http://my.monster.com") && !this.externalApply) { logInfo(log, "-- Ignored [externalApply] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerVisaDescription(elDescription.html())) { logInfo(log, "-- Ignored [visa] " + headline); addAlreadyPerformedAction(jobUrl); return true; } if (!considerExperienceDescription(elDescription.html())) { logInfo(log, "-- Ignored [exp] " + headline); addAlreadyPerformedAction(jobUrl); return true; } ScorerResult result = LinkedInJobDBComparer.getScore(elDescription.html()); if (result.getScore() < this.requiredScore) { logInfo(log, "-- Ignored [scorer] " + result.getScore() + " - " + result.getMatches() + " - " + headline); addAlreadyPerformedAction(jobUrl); return true; } headline = headline + " - " + result.getMatches(); logInfo(log, "Open --> " + headline); // logInfo(log, elDescription.html()); performUnique(jobUrl); try { Thread.sleep(5000L); } catch (InterruptedException e) { e.printStackTrace(); } return true; }