List of usage examples for org.apache.commons.lang3 StringUtils strip
public static String strip(final String str)
Strips whitespace from the start and end of a String.
This is similar to #trim(String) but removes whitespace.
From source file:io.seldon.importer.articles.ItemAttributesImporter.java
public static Map<String, String> getAttributes(String url, String existingCategory) { ItemProcessResult itemProcessResult = new ItemProcessResult(); itemProcessResult.client_item_id = url; itemProcessResult.extraction_status = "EXTRACTION_FAILED"; logger.info("Trying to get attributes for " + url); Map<String, String> attributes = null; String title = ""; String category = ""; String subCategory = ""; String img_url = ""; String description = ""; String tags = ""; String leadtext = ""; String link = ""; String publishDate = ""; String domain = ""; try {//from w w w .jav a 2 s . c o m long now = System.currentTimeMillis(); long timeSinceLastRequest = now - lastUrlFetchTime; if (timeSinceLastRequest < minFetchGapMsecs) { long timeToSleep = minFetchGapMsecs - timeSinceLastRequest; logger.info( "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest); Thread.sleep(timeToSleep); } Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get(); lastUrlFetchTime = System.currentTimeMillis(); //get IMAGE URL if (StringUtils.isNotBlank(imageCssSelector)) { Element imageElement = articleDoc.select(imageCssSelector).first(); if (imageElement != null && imageElement.attr("content") != null) { img_url = imageElement.attr("content"); } if (imageElement != null && StringUtils.isBlank(img_url)) { img_url = imageElement.attr("src"); } if (imageElement != null && StringUtils.isBlank(img_url)) { img_url = imageElement.attr("href"); } } if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) { logger.info("Setting image to default: " + defImageUrl); img_url = defImageUrl; } img_url = StringUtils.strip(img_url); //get TITLE if (StringUtils.isNotBlank(titleCssSelector)) { Element titleElement = articleDoc.select(titleCssSelector).first(); if ((titleElement != null) && (titleElement.attr("content") != null)) { title = titleElement.attr("content"); } // if still blank get from text instead if (StringUtils.isBlank(title) && (titleElement != null)) { title = titleElement.text(); } } //get LEAD TEXT if (StringUtils.isNotBlank(leadTextCssSelector)) { Element leadElement = articleDoc.select(leadTextCssSelector).first(); if (leadElement != null && leadElement.attr("content") != null) { leadtext = leadElement.attr("content"); } } //get publish date if (StringUtils.isNotBlank(publishDateCssSelector)) { //2013-01-21T10:40:55Z Element pubElement = articleDoc.select(publishDateCssSelector).first(); if (pubElement != null && pubElement.attr("content") != null) { String pubtext = pubElement.attr("content"); SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH); Date result = null; try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date withUTC format " + pubtext); } //try a simpler format df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date " + pubtext); } if (result != null) publishDate = dateFormatter.format(result); else logger.error("Failed to parse date " + pubtext); } } //get Link if (StringUtils.isNotBlank(linkCssSelector)) { Element linkElement = articleDoc.select(linkCssSelector).first(); if (linkElement != null && linkElement.attr("content") != null) { link = linkElement.attr("content"); } } //get CONTENT if (StringUtils.isNotBlank(textCssSelector)) { Element descriptionElement = articleDoc.select(textCssSelector).first(); if (descriptionElement != null) description = Jsoup.parse(descriptionElement.html()).text(); } //get TAGS Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title); if (tagSet.size() > 0) tags = CollectionTools.join(tagSet, ","); //get CATEGORY - client specific if (StringUtils.isNotBlank(categoryCssSelector)) { Element categoryElement = articleDoc.select(categoryCssSelector).first(); if (categoryElement != null && categoryElement.attr("content") != null) { category = categoryElement.attr("content"); if (StringUtils.isNotBlank(category)) category = category.toUpperCase(); } } else if (StringUtils.isNotBlank(categoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + categoryClassPrefix + "CategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); category = extractor.getCategory(url, articleDoc); } //get Sub CATEGORY - client specific if (StringUtils.isNotBlank(subCategoryCssSelector)) { Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first(); if (subCategoryElement != null && subCategoryElement.attr("content") != null) { subCategory = subCategoryElement.attr("content"); if (StringUtils.isNotBlank(subCategory)) subCategory = category.toUpperCase(); } } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix + "SubCategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); subCategory = extractor.getCategory(url, articleDoc); } // Get domain if (domainIsNeeded) { domain = getDomain(url); } if ((StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url)) && (categoryNotNeeded || StringUtils.isNotBlank(category)) && (!domainIsNeeded || StringUtils.isNotBlank(domain)))) { attributes = new HashMap<String, String>(); attributes.put(TITLE, title); if (StringUtils.isNotBlank(category)) attributes.put(CATEGORY, category); if (StringUtils.isNotBlank(subCategory)) attributes.put(SUBCATEGORY, subCategory); if (StringUtils.isNotBlank(link)) attributes.put(LINK, link); if (StringUtils.isNotBlank(leadtext)) attributes.put(LEAD_TEXT, leadtext); if (StringUtils.isNotBlank(img_url)) attributes.put(IMG_URL, img_url); if (StringUtils.isNotBlank(tags)) attributes.put(TAGS, tags); attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE); if (StringUtils.isNotBlank(description)) attributes.put(DESCRIPTION, description); if (StringUtils.isNotBlank(publishDate)) attributes.put(PUBLISH_DATE, publishDate); if (StringUtils.isNotBlank(domain)) attributes.put(DOMAIN, domain); System.out.println("Item: " + url + "; Category: " + category + " SubCategory: " + subCategory); itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED"; } else { logger.warn("Failed to get needed attributes for article " + url); logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain=" + domain + "]"); } { // check for failures for the log result if (StringUtils.isBlank(title)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title"; } if (!imageNotNeeded && StringUtils.isBlank(img_url)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url"; } if (!categoryNotNeeded && StringUtils.isBlank(category)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "category"; } } } catch (Exception e) { logger.error("Article: " + url + ". Attributes import FAILED", e); itemProcessResult.error = e.toString(); } AttributesImporterUtils.logResult(logger, itemProcessResult); return attributes; }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * Gets the color format from cfg./* w w w.j a v a 2s . co m*/ * * @return the color format from cfg */ private int getColorFormatFromCfg() { colorFormat = new ArrayList<ColorFormat>(); String rawConfig = config.getProperty(ConfigConstant.CFG_PANEL_COLOR_ORDER); if (StringUtils.isNotBlank(rawConfig)) { for (String s : rawConfig.split(ConfigConstant.DELIM)) { try { ColorFormat cf = ColorFormat.valueOf(StringUtils.strip(s)); colorFormat.add(cf); } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } return colorFormat.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * /*from w ww.j a va 2 s. com*/ * @return */ private int getPanelOrderFromCfg(int totalDevices) { panelOrder = new LinkedList<Integer>(); String rawConfig = config.getProperty(ConfigConstant.PIXELINVADERS_PANEL_ORDER); if (StringUtils.isNotBlank(rawConfig)) { for (String s : rawConfig.split(ConfigConstant.DELIM)) { try { Integer order = Integer.parseInt(StringUtils.strip(s)); //sanity check if (order >= totalDevices) { LOG.log(Level.WARNING, ConfigConstant.PIXELINVADERS_PANEL_ORDER + ": Error parsing, " + "order value " + order + " >= total panels " + totalDevices + ". Settings igored!"); panelOrder.clear(); return 0; } panelOrder.add(order); } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, ConfigConstant.PIXELINVADERS_PANEL_ORDER); } } } return panelOrder.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * Parses the i2c address.// w ww .j av a 2s. c o m * * @return the int */ private int parseI2cAddress() { i2cAddr = new ArrayList<Integer>(); String rawConfig = config.getProperty(ConfigConstant.RAINBOWDUINO_V2_ROW1); if (StringUtils.isNotBlank(rawConfig)) { this.deviceXResolution = 8; this.deviceYResolution = 8; for (String s : rawConfig.split(ConfigConstant.DELIM)) { i2cAddr.add(Integer.decode(StringUtils.strip(s))); devicesInRow1++; } } rawConfig = config.getProperty(ConfigConstant.RAINBOWDUINO_V2_ROW2); if (StringUtils.isNotBlank(rawConfig)) { for (String s : rawConfig.split(ConfigConstant.DELIM)) { i2cAddr.add(Integer.decode(StringUtils.strip(s))); devicesInRow2++; } } return i2cAddr.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * //from w w w . ja va 2s. c o m * @return */ private int parseRainbowduinoV3Config() { this.rainbowduinoV3SerialDevices = new ArrayList<String>(); String row1String = this.config.getProperty(ConfigConstant.RAINBOWDUINO_V3_ROW1); if (StringUtils.isNotBlank(row1String)) { this.deviceXResolution = 8; this.deviceYResolution = 8; for (String string : row1String.split(ConfigConstant.DELIM)) { this.rainbowduinoV3SerialDevices.add(StringUtils.strip(string)); this.devicesInRow1++; } } String row2String = this.config.getProperty(ConfigConstant.RAINBOWDUINO_V3_ROW2); if (StringUtils.isNotBlank(row2String)) { for (String string : row2String.split(ConfigConstant.DELIM)) { this.rainbowduinoV3SerialDevices.add(StringUtils.strip(string)); this.devicesInRow2++; } } return this.rainbowduinoV3SerialDevices.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * Parses the art net devices./*from w w w.j av a2 s .c om*/ * * @return the int */ private int parseArtNetDevices() { artNetDevice = new ArrayList<DeviceConfig>(); //minimal ip length 1.1.1.1 if (StringUtils.length(getArtNetIp()) > 6 && parseOutputXResolution() > 0 && parseOutputYResolution() > 0) { this.deviceXResolution = parseOutputXResolution(); this.deviceYResolution = parseOutputYResolution(); String value = config.getProperty(ConfigConstant.ARTNET_ROW1); if (StringUtils.isNotBlank(value)) { devicesInRow1 = 0; devicesInRow2 = 0; for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); artNetDevice.add(cfg); devicesInRow1++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } value = config.getProperty(ConfigConstant.ARTNET_ROW2); if (StringUtils.isNotBlank(value)) { for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); artNetDevice.add(cfg); devicesInRow2++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } } return artNetDevice.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * Parses the e131 devices./*from w ww . j av a2 s .c o m*/ * * @return the int */ private int parseE131Devices() { e131Device = new ArrayList<DeviceConfig>(); if (StringUtils.length(getE131Ip()) > 6 && parseOutputXResolution() > 0 && parseOutputYResolution() > 0) { this.deviceXResolution = parseOutputXResolution(); this.deviceYResolution = parseOutputYResolution(); String value = config.getProperty(ConfigConstant.E131_ROW1); if (StringUtils.isNotBlank(value)) { devicesInRow1 = 0; devicesInRow2 = 0; for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); e131Device.add(cfg); devicesInRow1++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } value = config.getProperty(ConfigConstant.E131_ROW2); if (StringUtils.isNotBlank(value)) { for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); e131Device.add(cfg); devicesInRow2++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } } return e131Device.size(); }
From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java
/** * Parses tpm2net devices/* w ww .j av a 2 s. c om*/ * @return */ private int parseTpm2NetDevices() { tpm2netDevice = new ArrayList<DeviceConfig>(); if (StringUtils.isNotBlank(getTpm2NetIpAddress()) && parseOutputXResolution() > 0 && parseOutputYResolution() > 0) { this.deviceXResolution = parseOutputXResolution(); this.deviceYResolution = parseOutputYResolution(); String value = config.getProperty(ConfigConstant.TPM2NET_ROW1); if (StringUtils.isNotBlank(value)) { devicesInRow1 = 0; devicesInRow2 = 0; for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); tpm2netDevice.add(cfg); devicesInRow1++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } value = config.getProperty(ConfigConstant.TPM2NET_ROW2); if (StringUtils.isNotBlank(value)) { for (String s : value.split(ConfigConstant.DELIM)) { try { DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s)); tpm2netDevice.add(cfg); devicesInRow2++; } catch (Exception e) { LOG.log(Level.WARNING, FAILED_TO_PARSE, s); } } } } return tpm2netDevice.size(); }
From source file:org.dbgl.util.searchengine.WebSearchEngine.java
protected static String unescapeHtml(final String htmlChunk) { String result = replaceTag(HTML_BR_UNCLOSED, "\n", htmlChunk); result = replaceTag(HTML_BR_CLOSED, "\n", result); result = replaceTag(HTML_BR_CLOSED_ALT, "\n", result); result = replaceTag(" ", " ", result); result = replaceTag("'", "'", result); return StringEscapeUtils.unescapeHtml4(StringUtils.strip(result)); }
From source file:org.efaps.cli.EQLHandler.java
/** * Gets the stmt./*from w w w .java2 s . c o m*/ * * @return the stmt * @throws IOException Signals that an I/O exception has occurred. */ protected String getStmt() throws IOException { final StringBuilder eql = EQLObserver.get().getEql(); while (!StringUtils.endsWithAny(eql, ";", "; ", "; ", "; ")) { eql.append(this.input.in().withPromt("\\").readLine()); } return StringUtils.removeEnd(StringUtils.strip(eql.toString()), ";"); }