Extracts words from text removing non alpha characters - Java java.lang

Java examples for java.lang:String UTF

Description

Extracts words from text removing non alpha characters

Demo Code


//package com.java2s;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    /**//from   w w  w  .java 2 s  .  c  om
     * Extracts words from text removing non alpha characters
     *
     * @param text to extract words from
     * @return list of found words or empty list if none found
     */
    public static List<String> getWords(String text) {

        List<String> output = new ArrayList<>();
        if (isNullOrEmptyTrimmed(text)) {
            return output;
        }

        Pattern p = Pattern.compile("\\b\\p{L}+\\b");

        Matcher m = p.matcher(text);
        while (m.find()) {
            output.add(m.group());
        }

        return output;
    }

    /**
     * Checks if string is null, empty or contains only spaces
     *
     * @param value string to test
     * @return <code>true</code> if <code>null</code>, empty or spaces only,
     * <code>false</code> otherwise.
     */
    public static boolean isNullOrEmptyTrimmed(String value) {

        return (value == null || value.trim().length() == 0);
    }

    /**
     * null resilient trim
     *
     * @param value trims value or returns null if null
     * @return null or trimmed value (empty string is left empty)
     */
    public static String trim(String value) {

        return value != null ? value.trim() : null;
    }
}

Related Tutorials