List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;//from ww w .j a va 2 s.c om if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); Element element = articleDoc.select(cssSelector).first(); if (StringUtils.isNotBlank(cssSelector)) { int arg_count = 0; for (String value_name : attributeDetail.extractor_args) { if (arg_count > 0) { // skip the first one, its the cssSelector if (element != null && element.attr(value_name) != null) { attrib_value = element.attr(value_name); if (StringUtils.isNotBlank(attrib_value)) { break; } } } arg_count++; } } } return attrib_value; }
From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrListValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;/*from ww w . j a v a 2s . co m*/ if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); Element element = articleDoc.select(cssSelector).first(); if (StringUtils.isNotBlank(cssSelector)) { int arg_count = 0; for (String value_name : attributeDetail.extractor_args) { if (arg_count > 0) { // skip the first one, its the cssSelector if (element != null && element.attr(value_name) != null) { String rawList = element.attr(value_name); if (StringUtils.isNotBlank(rawList)) { String[] parts = rawList.split(","); for (int i = 0; i < parts.length; i++) { parts[i] = parts[i].trim().toLowerCase(); } attrib_value = StringUtils.join(parts, ','); break; } } } arg_count++; } } } return attrib_value; }
From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrUppercaseValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;/*from w w w. ja v a 2 s. com*/ if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); Element element = articleDoc.select(cssSelector).first(); if (StringUtils.isNotBlank(cssSelector)) { int arg_count = 0; for (String value_name : attributeDetail.extractor_args) { if (arg_count > 0) { // skip the first one, its the cssSelector if (element != null && element.attr(value_name) != null) { attrib_value = element.attr(value_name); if (StringUtils.isNotBlank(attrib_value)) { break; } } } arg_count++; } } } attrib_value = (attrib_value != null) ? attrib_value.toUpperCase() : attrib_value; return attrib_value; }
From source file:org.jresponder.message.MessageRefImpl.java
@Override public synchronized void refresh() throws InvalidMessageException { try {//from ww w. j a v a 2 s .co m logger().debug("MessageRef - Starting refresh for: {}", file.getCanonicalPath()); // set timestamp fileContentsTimestamp = file.lastModified(); StringBuilder myStringBuilder = new StringBuilder(); char[] buf = new char[4096]; BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); int len; while ((len = r.read(buf)) > 0) { myStringBuilder.append(buf, 0, len); } r.close(); fileContents = myStringBuilder.toString(); document = Jsoup.parse(fileContents, "UTF-8"); propMap = new HashMap<String, String>(); Elements myMetaTagElements = document.select("meta"); if (myMetaTagElements == null || myMetaTagElements.isEmpty()) { throw new InvalidMessageException("No meta tags found in file: " + file.getCanonicalPath()); } for (Element myPropElement : myMetaTagElements) { String myName = myPropElement.attr("name"); String myValue = myPropElement.attr("content"); propMap.put(myName, myValue); } // bodies are not read at all until message generation time } catch (IOException e) { throw new InvalidMessageException(e); } // debug dump if (logger().isDebugEnabled()) { for (String myKey : propMap.keySet()) { logger().debug(" property -- {}: {}", myKey, (propMap.get(myKey))); } } }
From source file:com.isomorphic.maven.packaging.Downloads.java
/** * Interrogates the remote server for a list of hyperlinks matching the given distribution's {@link Distribution#getRemoteIndexFilter() filter}. * /*w ww. ja v a 2 s.c o m*/ * @param dist the build in which some files should exist * @return a String array of html href attributes * @throws MojoExecutionException */ private String[] list(Distribution dist) throws MojoExecutionException { HttpGet request = new HttpGet(dist.getRemoteIndex()); HttpResponse response; try { LOGGER.debug("Requesting list of files from {}{}", DOMAIN, dist.getRemoteIndex()); response = httpClient.execute(host, request); } catch (Exception e) { throw new MojoExecutionException("Error issuing GET request for bundle at '" + request + "'", e); } Document doc; try { String html = EntityUtils.toString(response.getEntity()); doc = Jsoup.parse(html); doc.outputSettings().prettyPrint(true); } catch (Exception e) { throw new MojoExecutionException("Error processing response from '" + request + "'", e); } List<String> result = new ArrayList<String>(); Elements links = doc.select(dist.getRemoteIndexFilter()); for (Element element : links) { String href = element.attr("href"); result.add(href); } if (result.isEmpty()) { String msg = String.format("No downloads found at '%s%s'. Response from server: \n\n%s\n", DOMAIN, dist.getRemoteIndex(), doc.html()); LOGGER.warn(msg); } return result.toArray(new String[0]); }
From source file:me.vertretungsplan.parser.DSBLightParser.java
private void parsePreProgram(SubstitutionSchedule v, Map<String, String> referer, Element iframe) throws IOException, CredentialInvalidException, JSONException { Pattern regex = Pattern.compile("location\\.href=\"([^\"]*)\""); // PreProgram.aspx String response2 = httpGet(iframe.attr("src"), ENCODING, referer); Matcher matcher = regex.matcher(response2); if (matcher.find()) { // Program.aspx String url = matcher.group(1); parseProgram(url, v, referer);//w w w . j ava 2 s . co m } else { throw new IOException("URL nicht gefunden"); } }
From source file:com.astamuse.asta4d.web.form.flow.base.BasicFormFlowSnippetTrait.java
default Renderer hideCascadeFormTemplateDOM(Class<?> subFormType) { return Renderer.create(":root", new ElementSetter() { @Override//from w w w. j av a2 s . c o m public void set(Element elem) { String style = elem.attr("style"); if (StringUtils.isEmpty(style)) { style = "display:none"; } else { if (!style.endsWith(";")) { style += ";"; } style += "display:none"; } elem.attr("style", style); } }); }
From source file:com.astamuse.asta4d.web.form.field.impl.AbstractRadioAndCheckboxRenderer.java
protected Renderer addDefaultAlternativeDom(final String editTargetSelector, final List<String> valueList) { final List<String> duplicatorRefList = new LinkedList<>(); final List<String> idList = new LinkedList<>(); ClosureVarRef<Boolean> editTargetExists = new ClosureVarRef<Boolean>(false); Renderer renderer = Renderer.create(editTargetSelector, new ElementSetter() { @Override/* w ww.ja va 2s . co m*/ public void set(Element elem) { String duplicatorRef = elem.attr(RadioPrepareRenderer.DUPLICATOR_REF_ATTR); if (StringUtils.isNotEmpty(duplicatorRef)) { duplicatorRefList.add(duplicatorRef); } idList.add(elem.id()); editTargetExists.set(true); } }); /* renderer.add(":root", () -> { return Renderer.create().addDebugger("current root for addDefaultAlternativeDom"); }); */ renderer.add(":root", new Renderable() { @Override public Renderer render() { // skip create display alternative DOM if edit target does not exist. if (editTargetExists.get()) { // it is OK } else { return Renderer.create(); } String attachTargetSelector; if (duplicatorRefList.size() > 0) { attachTargetSelector = SelectorUtil.attr(RadioPrepareRenderer.DUPLICATOR_REF_ID_ATTR, duplicatorRefList.get(duplicatorRefList.size() - 1)); } else if (idList.size() == 0) { String msg = "The target item[%s] must have id specified."; throw new IllegalArgumentException(String.format(msg, editTargetSelector)); } else { attachTargetSelector = SelectorUtil.id(idList.get(idList.size() - 1)); } return new Renderer(attachTargetSelector, new ElementTransformer(null) { @Override public Element invoke(Element elem) { GroupNode group = new GroupNode(); Element editClone = elem.clone(); group.appendChild(editClone); for (String v : valueList) { String nonNullString = retrieveDisplayStringFromStoredOptionValueMap(editTargetSelector, v); group.appendChild(createAlternativeDisplayElement(nonNullString)); } return group; }// invoke });// new renderer }// render() });// renderable return renderer; }
From source file:org.sbs.goodcrawler.extractor.selector.PageElementSelector.java
@SuppressWarnings("unchecked") @Override// www. j ava2 s. com public HashMap<String, Object> getContent() throws ExtractException { if (null != content && !newDoc) { return content; } // ??document if (!newDoc) { return content; } // ?documentSelector List<String> urls = Lists.newArrayList(); if (super.document != null) { Elements elements = super.document.select(value); if (elements.isEmpty()) return null; switch ($Attr) { case text: for (Element e : elements) { urls.add(e.text()); } break; default: for (Element e : elements) { urls.add(e.attr(attr)); } break; } } if (urls.size() > 0) { content = Maps.newHashMap(); for (String url : urls) { Document doc = null; PageFetchResult result = null; try { WebURL webUrl = new WebURL(); webUrl.setURL(url); result = FetchForeman.fetcher.fetchHeader(webUrl); // ?? int statusCode = result.getStatusCode(); if (statusCode == CustomFetchStatus.PageTooBig) { return null; } if (statusCode != HttpStatus.SC_OK) { return null; } else { Page page = new Page(webUrl); if (!result.fetchContent(page)) { return null; } if (!parser.parse(page, webUrl.getURL())) { return null; } doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()), urlUtils.getBaseUrl(page.getWebURL().getURL())); } } catch (IOException e) { e.printStackTrace(); throw new ExtractException(e.getMessage()); } finally { if (result != null) result.discardContentIfNotConsumed(); } if (selectors != null) for (AbstractElementCssSelector<?> selector : selectors) { if (selector instanceof FileElementCssSelector) { Map<String, Object> m = ((FileElementCssSelector) selector).setResult(content) .setDocument(doc).getContentMap(); if ((null == m || m.size() == 0) && selector.isRequired()) { return null; } else { if (null != m && m.size() > 0) content = MapUtils.mager(content, (HashMap<String, Object>) m); } } else { Map<String, Object> m = selector.setDocument(doc).getContentMap(); if ((null == m || m.size() == 0) && selector.isRequired()) { return null; } else { if (null != m && m.size() > 0) content = MapUtils.mager(content, (HashMap<String, Object>) m); } } } } return content; } newDoc = false; return null; }