org.talend.dataquality.converters.StringTrimmer.java Source code

Java tutorial

Introduction

Here is the source code for org.talend.dataquality.converters.StringTrimmer.java

Source

// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.converters;

import org.apache.commons.lang.StringUtils;

/**
 * this class is used to remove trailing and leading characters.<br/>
 * created by msjian on 2017.2.16
 * 
 */
public class StringTrimmer {

    public String[] WHITESPACE_CHARS = new String[] { "\t" // CHARACTER TABULATION //$NON-NLS-1$
            , "\n" // LINE FEED (LF) //$NON-NLS-1$
            , '\u000B' + "" // LINE TABULATION //$NON-NLS-1$
            , "\f" // FORM FEED (FF) //$NON-NLS-1$
            , "\r" // CARRIAGE RETURN (CR) //$NON-NLS-1$
            , " " // SPACE //$NON-NLS-1$
            , '\u0085' + "" // NEXT LINE (NEL) //$NON-NLS-1$
            , '\u00A0' + "" // NO-BREAK SPACE //$NON-NLS-1$
            , '\u1680' + "" // OGHAM SPACE MARK //$NON-NLS-1$
            , '\u180E' + "" // MONGOLIAN VOWEL SEPARATOR //$NON-NLS-1$
            , '\u2000' + "" // EN QUAD //$NON-NLS-1$
            , '\u2001' + "" // EM QUAD //$NON-NLS-1$
            , '\u2002' + "" // EN SPACE //$NON-NLS-1$
            , '\u2003' + "" // EM SPACE //$NON-NLS-1$
            , '\u2004' + "" // THREE-PER-EM SPACE //$NON-NLS-1$
            , '\u2005' + "" // FOUR-PER-EM SPACE //$NON-NLS-1$
            , '\u2006' + "" // SIX-PER-EM SPACE //$NON-NLS-1$
            , '\u2007' + "" // FIGURE SPACE //$NON-NLS-1$
            , '\u2008' + "" // PUNCTUATION SPACE //$NON-NLS-1$
            , '\u2009' + "" // THIN SPACE //$NON-NLS-1$
            , '\u200A' + "" // HAIR SPACE //$NON-NLS-1$
            , '\u2028' + "" // LINE SEPARATOR //$NON-NLS-1$
            , '\u2029' + "" // PARAGRAPH SEPARATOR //$NON-NLS-1$
            , '\u202F' + "" // NARROW NO-BREAK SPACE //$NON-NLS-1$
            , '\u205F' + "" // MEDIUM MATHEMATICAL SPACE //$NON-NLS-1$
            , '\u3000' + "" // IDEOGRAPHIC SPACE //$NON-NLS-1$
    };

    /**
     * Remove trailing and leading characters which may be empty string, space string,\t,\n,\r,\f...any space, break related
     * characters.
     * 
     * @param inputStr - the input text.
     * @return String
     */
    public String removeTrailingAndLeadingWhitespaces(String inputStr) {
        if (StringUtils.isEmpty(inputStr)) {
            return inputStr;
        }

        String result = inputStr;
        while (StringUtils.startsWithAny(result, WHITESPACE_CHARS)) {
            result = StringUtils.removeStart(result, result.substring(0, 1));
        }

        while (StringUtils.endsWithAny(result, WHITESPACE_CHARS)) {
            result = StringUtils.removeEnd(result, result.substring(result.length() - 1, result.length()));
        }
        return result;
    }

    /**
     * Remove trailing and leading characters and the remove Character is whitespace only. <br/>
     * <br/>
     * Note: this is not equals inputStr.trim() for example: <br/>
     * when the inputStr is ("\t" + "abc "), this method will return ("\t" + "abc"),<br/>
     * but for trim method will return "abc"
     * 
     * @param inputStr - the input text.
     * @return String
     */
    public String removeTrailingAndLeading(String inputStr) {
        return removeTrailingAndLeading(inputStr, " "); //$NON-NLS-1$
    }

    /**
     * Remove trailing and leading characters.
     * 
     * @param inputStr - the input text.
     * @param removeCharacter - the remove character.
     * @return String.
     */
    public String removeTrailingAndLeading(String inputStr, String removeCharacter) {
        if (StringUtils.isEmpty(inputStr) || StringUtils.isEmpty(removeCharacter)) {
            return inputStr;
        }

        String result = inputStr;

        while (result.startsWith(removeCharacter)) {
            result = StringUtils.removeStart(result, removeCharacter);
        }
        while (result.endsWith(removeCharacter)) {
            result = StringUtils.removeEnd(result, removeCharacter);
        }
        return result;
    }

}