Example usage for org.apache.commons.lang3 StringUtils strip

List of usage examples for org.apache.commons.lang3 StringUtils strip

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils strip.

Prototype

public static String strip(final String str) 

Source Link

Document

Strips whitespace from the start and end of a String.

This is similar to #trim(String) but removes whitespace.

Usage

From source file:io.seldon.importer.articles.ItemAttributesImporter.java

public static Map<String, String> getAttributes(String url, String existingCategory) {
    ItemProcessResult itemProcessResult = new ItemProcessResult();
    itemProcessResult.client_item_id = url;
    itemProcessResult.extraction_status = "EXTRACTION_FAILED";

    logger.info("Trying to get attributes for " + url);
    Map<String, String> attributes = null;
    String title = "";
    String category = "";
    String subCategory = "";
    String img_url = "";
    String description = "";
    String tags = "";
    String leadtext = "";
    String link = "";
    String publishDate = "";
    String domain = "";
    try {//from   w  w w  .jav a 2 s  .  c o  m
        long now = System.currentTimeMillis();
        long timeSinceLastRequest = now - lastUrlFetchTime;
        if (timeSinceLastRequest < minFetchGapMsecs) {
            long timeToSleep = minFetchGapMsecs - timeSinceLastRequest;
            logger.info(
                    "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest);
            Thread.sleep(timeToSleep);
        }
        Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get();
        lastUrlFetchTime = System.currentTimeMillis();
        //get IMAGE URL
        if (StringUtils.isNotBlank(imageCssSelector)) {
            Element imageElement = articleDoc.select(imageCssSelector).first();
            if (imageElement != null && imageElement.attr("content") != null) {
                img_url = imageElement.attr("content");
            }
            if (imageElement != null && StringUtils.isBlank(img_url)) {
                img_url = imageElement.attr("src");
            }
            if (imageElement != null && StringUtils.isBlank(img_url)) {
                img_url = imageElement.attr("href");
            }

        }

        if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) {
            logger.info("Setting image to default: " + defImageUrl);
            img_url = defImageUrl;
        }
        img_url = StringUtils.strip(img_url);

        //get TITLE
        if (StringUtils.isNotBlank(titleCssSelector)) {
            Element titleElement = articleDoc.select(titleCssSelector).first();
            if ((titleElement != null) && (titleElement.attr("content") != null)) {
                title = titleElement.attr("content");
            }

            // if still blank get from text instead
            if (StringUtils.isBlank(title) && (titleElement != null)) {
                title = titleElement.text();
            }
        }

        //get LEAD TEXT
        if (StringUtils.isNotBlank(leadTextCssSelector)) {
            Element leadElement = articleDoc.select(leadTextCssSelector).first();
            if (leadElement != null && leadElement.attr("content") != null) {
                leadtext = leadElement.attr("content");
            }
        }

        //get publish date
        if (StringUtils.isNotBlank(publishDateCssSelector)) {
            //2013-01-21T10:40:55Z
            Element pubElement = articleDoc.select(publishDateCssSelector).first();
            if (pubElement != null && pubElement.attr("content") != null) {
                String pubtext = pubElement.attr("content");
                SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
                Date result = null;
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date withUTC format " + pubtext);
                }
                //try a simpler format
                df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date " + pubtext);
                }

                if (result != null)
                    publishDate = dateFormatter.format(result);
                else
                    logger.error("Failed to parse date " + pubtext);
            }
        }

        //get Link
        if (StringUtils.isNotBlank(linkCssSelector)) {
            Element linkElement = articleDoc.select(linkCssSelector).first();
            if (linkElement != null && linkElement.attr("content") != null) {
                link = linkElement.attr("content");
            }
        }

        //get CONTENT
        if (StringUtils.isNotBlank(textCssSelector)) {
            Element descriptionElement = articleDoc.select(textCssSelector).first();
            if (descriptionElement != null)
                description = Jsoup.parse(descriptionElement.html()).text();
        }

        //get TAGS
        Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title);

        if (tagSet.size() > 0)
            tags = CollectionTools.join(tagSet, ",");

        //get CATEGORY - client specific
        if (StringUtils.isNotBlank(categoryCssSelector)) {
            Element categoryElement = articleDoc.select(categoryCssSelector).first();
            if (categoryElement != null && categoryElement.attr("content") != null) {
                category = categoryElement.attr("content");
                if (StringUtils.isNotBlank(category))
                    category = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(categoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + categoryClassPrefix
                    + "CategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            category = extractor.getCategory(url, articleDoc);
        }

        //get Sub CATEGORY - client specific
        if (StringUtils.isNotBlank(subCategoryCssSelector)) {
            Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first();
            if (subCategoryElement != null && subCategoryElement.attr("content") != null) {
                subCategory = subCategoryElement.attr("content");
                if (StringUtils.isNotBlank(subCategory))
                    subCategory = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix
                    + "SubCategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            subCategory = extractor.getCategory(url, articleDoc);
        }

        // Get domain
        if (domainIsNeeded) {
            domain = getDomain(url);
        }

        if ((StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url))
                && (categoryNotNeeded || StringUtils.isNotBlank(category))
                && (!domainIsNeeded || StringUtils.isNotBlank(domain)))) {
            attributes = new HashMap<String, String>();
            attributes.put(TITLE, title);
            if (StringUtils.isNotBlank(category))
                attributes.put(CATEGORY, category);
            if (StringUtils.isNotBlank(subCategory))
                attributes.put(SUBCATEGORY, subCategory);
            if (StringUtils.isNotBlank(link))
                attributes.put(LINK, link);
            if (StringUtils.isNotBlank(leadtext))
                attributes.put(LEAD_TEXT, leadtext);
            if (StringUtils.isNotBlank(img_url))
                attributes.put(IMG_URL, img_url);
            if (StringUtils.isNotBlank(tags))
                attributes.put(TAGS, tags);
            attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE);
            if (StringUtils.isNotBlank(description))
                attributes.put(DESCRIPTION, description);
            if (StringUtils.isNotBlank(publishDate))
                attributes.put(PUBLISH_DATE, publishDate);
            if (StringUtils.isNotBlank(domain))
                attributes.put(DOMAIN, domain);
            System.out.println("Item: " + url + "; Category: " + category + " SubCategory: " + subCategory);
            itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED";
        } else {
            logger.warn("Failed to get needed attributes for article " + url);
            logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain="
                    + domain + "]");
        }

        { // check for failures for the log result
            if (StringUtils.isBlank(title)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title";
            }
            if (!imageNotNeeded && StringUtils.isBlank(img_url)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url";
            }
            if (!categoryNotNeeded && StringUtils.isBlank(category)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",")
                        + "category";
            }
        }
    } catch (Exception e) {
        logger.error("Article: " + url + ". Attributes import FAILED", e);
        itemProcessResult.error = e.toString();
    }

    AttributesImporterUtils.logResult(logger, itemProcessResult);

    return attributes;
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * Gets the color format from cfg./* w w w.j a  v a  2s  .  co  m*/
 *
 * @return the color format from cfg
 */
private int getColorFormatFromCfg() {
    colorFormat = new ArrayList<ColorFormat>();
    String rawConfig = config.getProperty(ConfigConstant.CFG_PANEL_COLOR_ORDER);

    if (StringUtils.isNotBlank(rawConfig)) {
        for (String s : rawConfig.split(ConfigConstant.DELIM)) {
            try {
                ColorFormat cf = ColorFormat.valueOf(StringUtils.strip(s));
                colorFormat.add(cf);
            } catch (Exception e) {
                LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
            }
        }
    }

    return colorFormat.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * /*from  w  ww.j  a  va  2  s. com*/
 * @return
 */
private int getPanelOrderFromCfg(int totalDevices) {
    panelOrder = new LinkedList<Integer>();
    String rawConfig = config.getProperty(ConfigConstant.PIXELINVADERS_PANEL_ORDER);

    if (StringUtils.isNotBlank(rawConfig)) {
        for (String s : rawConfig.split(ConfigConstant.DELIM)) {
            try {
                Integer order = Integer.parseInt(StringUtils.strip(s));

                //sanity check
                if (order >= totalDevices) {
                    LOG.log(Level.WARNING,
                            ConfigConstant.PIXELINVADERS_PANEL_ORDER + ": Error parsing, " + "order value "
                                    + order + " >= total panels " + totalDevices + ". Settings igored!");
                    panelOrder.clear();
                    return 0;
                }
                panelOrder.add(order);
            } catch (Exception e) {
                LOG.log(Level.WARNING, FAILED_TO_PARSE, ConfigConstant.PIXELINVADERS_PANEL_ORDER);
            }
        }
    }

    return panelOrder.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * Parses the i2c address.//  w ww  .j  av a  2s. c  o  m
 *
 * @return the int
 */
private int parseI2cAddress() {
    i2cAddr = new ArrayList<Integer>();

    String rawConfig = config.getProperty(ConfigConstant.RAINBOWDUINO_V2_ROW1);
    if (StringUtils.isNotBlank(rawConfig)) {
        this.deviceXResolution = 8;
        this.deviceYResolution = 8;

        for (String s : rawConfig.split(ConfigConstant.DELIM)) {
            i2cAddr.add(Integer.decode(StringUtils.strip(s)));
            devicesInRow1++;
        }
    }
    rawConfig = config.getProperty(ConfigConstant.RAINBOWDUINO_V2_ROW2);
    if (StringUtils.isNotBlank(rawConfig)) {
        for (String s : rawConfig.split(ConfigConstant.DELIM)) {
            i2cAddr.add(Integer.decode(StringUtils.strip(s)));
            devicesInRow2++;
        }
    }

    return i2cAddr.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * //from  w  w w . ja  va  2s. c  o  m
 * @return
 */
private int parseRainbowduinoV3Config() {
    this.rainbowduinoV3SerialDevices = new ArrayList<String>();
    String row1String = this.config.getProperty(ConfigConstant.RAINBOWDUINO_V3_ROW1);
    if (StringUtils.isNotBlank(row1String)) {
        this.deviceXResolution = 8;
        this.deviceYResolution = 8;
        for (String string : row1String.split(ConfigConstant.DELIM)) {
            this.rainbowduinoV3SerialDevices.add(StringUtils.strip(string));
            this.devicesInRow1++;
        }
    }
    String row2String = this.config.getProperty(ConfigConstant.RAINBOWDUINO_V3_ROW2);
    if (StringUtils.isNotBlank(row2String)) {
        for (String string : row2String.split(ConfigConstant.DELIM)) {
            this.rainbowduinoV3SerialDevices.add(StringUtils.strip(string));
            this.devicesInRow2++;
        }
    }
    return this.rainbowduinoV3SerialDevices.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * Parses the art net devices./*from   w  w  w.j av a2  s .c  om*/
 *
 * @return the int
 */
private int parseArtNetDevices() {
    artNetDevice = new ArrayList<DeviceConfig>();

    //minimal ip length 1.1.1.1
    if (StringUtils.length(getArtNetIp()) > 6 && parseOutputXResolution() > 0 && parseOutputYResolution() > 0) {
        this.deviceXResolution = parseOutputXResolution();
        this.deviceYResolution = parseOutputYResolution();

        String value = config.getProperty(ConfigConstant.ARTNET_ROW1);
        if (StringUtils.isNotBlank(value)) {

            devicesInRow1 = 0;
            devicesInRow2 = 0;

            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    artNetDevice.add(cfg);
                    devicesInRow1++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }

        value = config.getProperty(ConfigConstant.ARTNET_ROW2);
        if (StringUtils.isNotBlank(value)) {
            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    artNetDevice.add(cfg);
                    devicesInRow2++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }
    }

    return artNetDevice.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
  * Parses the e131 devices./*from w ww .  j  av a2 s .c o  m*/
  *
  * @return the int
  */
private int parseE131Devices() {
    e131Device = new ArrayList<DeviceConfig>();

    if (StringUtils.length(getE131Ip()) > 6 && parseOutputXResolution() > 0 && parseOutputYResolution() > 0) {

        this.deviceXResolution = parseOutputXResolution();
        this.deviceYResolution = parseOutputYResolution();

        String value = config.getProperty(ConfigConstant.E131_ROW1);
        if (StringUtils.isNotBlank(value)) {

            devicesInRow1 = 0;
            devicesInRow2 = 0;

            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    e131Device.add(cfg);
                    devicesInRow1++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }

        value = config.getProperty(ConfigConstant.E131_ROW2);
        if (StringUtils.isNotBlank(value)) {
            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    e131Device.add(cfg);
                    devicesInRow2++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }
    }

    return e131Device.size();
}

From source file:com.neophob.sematrix.core.properties.ApplicationConfigurationHelper.java

/**
 * Parses tpm2net devices/*  w  ww  .j  av  a  2  s.  c  om*/
 * @return
 */
private int parseTpm2NetDevices() {
    tpm2netDevice = new ArrayList<DeviceConfig>();

    if (StringUtils.isNotBlank(getTpm2NetIpAddress()) && parseOutputXResolution() > 0
            && parseOutputYResolution() > 0) {
        this.deviceXResolution = parseOutputXResolution();
        this.deviceYResolution = parseOutputYResolution();

        String value = config.getProperty(ConfigConstant.TPM2NET_ROW1);
        if (StringUtils.isNotBlank(value)) {

            devicesInRow1 = 0;
            devicesInRow2 = 0;

            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    tpm2netDevice.add(cfg);
                    devicesInRow1++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }

        value = config.getProperty(ConfigConstant.TPM2NET_ROW2);
        if (StringUtils.isNotBlank(value)) {
            for (String s : value.split(ConfigConstant.DELIM)) {
                try {
                    DeviceConfig cfg = DeviceConfig.valueOf(StringUtils.strip(s));
                    tpm2netDevice.add(cfg);
                    devicesInRow2++;
                } catch (Exception e) {
                    LOG.log(Level.WARNING, FAILED_TO_PARSE, s);
                }
            }
        }
    }
    return tpm2netDevice.size();
}

From source file:org.dbgl.util.searchengine.WebSearchEngine.java

protected static String unescapeHtml(final String htmlChunk) {
    String result = replaceTag(HTML_BR_UNCLOSED, "\n", htmlChunk);
    result = replaceTag(HTML_BR_CLOSED, "\n", result);
    result = replaceTag(HTML_BR_CLOSED_ALT, "\n", result);
    result = replaceTag("&nbsp;", " ", result);
    result = replaceTag("&apos;", "'", result);
    return StringEscapeUtils.unescapeHtml4(StringUtils.strip(result));
}

From source file:org.efaps.cli.EQLHandler.java

/**
 * Gets the stmt./*from  w  w w  .java2 s . c o m*/
 *
 * @return the stmt
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected String getStmt() throws IOException {
    final StringBuilder eql = EQLObserver.get().getEql();
    while (!StringUtils.endsWithAny(eql, ";", "; ", ";  ", ";   ")) {
        eql.append(this.input.in().withPromt("\\").readLine());
    }
    return StringUtils.removeEnd(StringUtils.strip(eql.toString()), ";");
}