Java tutorial
/*L * Copyright SAIC * Copyright SAIC-Frederick * * Distributed under the OSI-approved BSD 3-Clause License. * See http://ncip.github.com/cananolab/LICENSE.txt for details. */ package gov.nih.nci.cananolab.util; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringEscapeUtils; import org.apache.log4j.Logger; /** * This class contains a set of utilities for converting Strings to other * formats or converting other formats to String. * * @author pansu * */ public class StringUtils { private static Logger logger = Logger.getLogger(StringUtils.class); public static boolean isImgFileExt(String fileName) { if (isEmpty(fileName)) { return false; } boolean isImgFileExt = false; for (int i = 0; i < Constants.IMAGE_FILE_EXTENSIONS.length; i++) { if (fileName.toUpperCase().endsWith("." + Constants.IMAGE_FILE_EXTENSIONS[i])) { isImgFileExt = true; break; } } return isImgFileExt; } public static String join(String[] stringArray, String delimiter) { String joinedStr = ""; if (stringArray == null || stringArray.length == 0) { return joinedStr; } List<String> stringList = Arrays.asList(stringArray); return join(stringList, delimiter); } public static String join(Collection<String> stringList, String delimiter) { String joinedStr = ""; if (stringList == null || stringList.isEmpty()) { return joinedStr; } // remove empty items Collection<String> modList = new ArrayList<String>(stringList); for (String str : modList) { if (isEmpty(str)) { stringList.remove(str); } } StringBuilder sb = new StringBuilder(); int i = 0; for (String str : stringList) { if (i < stringList.size() - 1) { sb.append(str); sb.append(delimiter); } else { sb.append(str); } i++; } joinedStr = sb.toString(); return joinedStr; } public static String sortJoin(Collection<String> strings, String delimiter) { SortedSet<SortableName> sortableNames = new TreeSet<SortableName>(); for (String str : strings) { sortableNames.add(new SortableName(str)); } String joinedStr = ""; if (sortableNames == null || sortableNames.isEmpty()) { return joinedStr; } StringBuilder sb = new StringBuilder(); int i = 0; for (SortableName sortableName : sortableNames) { if (i < sortableNames.size() - 1) { if (!StringUtils.isEmpty(sortableName.getName())) // joinedStr += sortableName.getName() + delimiter; sb.append(sortableName.getName()); sb.append(delimiter); } else { if (!StringUtils.isEmpty(sortableName.getName())) // joinedStr += sortableName.getName(); sb.append(sortableName.getName()); } i++; } joinedStr = sb.toString(); return joinedStr; } /* * empty string of the collection will be included in the joined string and * null item in the collection will be converted to an empty string */ public static String joinEmptyItemIncluded(Collection<String> stringCollection, String delimiter) { StringBuffer buffer = new StringBuffer(); if (stringCollection == null || stringCollection.isEmpty()) { return ""; } Iterator iter = stringCollection.iterator(); while (iter.hasNext()) { String item = (String) iter.next(); if (item == null) item = ""; buffer.append(item); if (iter.hasNext()) { buffer.append(delimiter); } } return buffer.toString(); } /** * Escape HTML but keep line breaks, useful in preserving line breaks in * descriptions * * @param text * @return */ public static String escapeXmlButPreserveLineBreaks(String text) { if (isEmpty(text)) { return ""; } String[] words = text.trim().split("\r\n"); List<String> lines = new ArrayList<String>(); for (String word : words) { lines.add(word.trim()); } StringBuffer newText = new StringBuffer(); int i = 0; if (lines != null) { for (String line : lines) { String escapedLine = StringEscapeUtils.escapeXml(line); newText.append(escapedLine); if (i < lines.size() - 1) { newText.append("<br>"); } i++; } return newText.toString(); } else { return ""; } } public static Float convertToFloat(String floatStr) { if (isEmpty(floatStr)) { return null; } try { Float floatNum = Float.parseFloat(floatStr); return floatNum; } catch (NumberFormatException e) { logger.error("Error converting the given string to a float number", e); throw new RuntimeException("Can't convert the given string to a float number: " + floatStr); } } public static Long convertToLong(String longStr) { if (isEmpty(longStr)) { return null; } try { Long longNum = Long.parseLong(longStr); return longNum; } catch (NumberFormatException e) { logger.error("Error converting the given string to a long number", e); throw new RuntimeException("Can't convert the given string to a long number: " + longStr); } } public static String convertToString(Object obj) { if (obj == null) { return ""; } else { return obj.toString(); } } public static boolean isInteger(String theStr) { if (isEmpty(theStr)) { return false; } else { for (int i = 0; i < theStr.length(); i++) { if (!Character.isDigit(theStr.charAt(i))) { return false; } } return true; } } public static boolean isDouble(String theStr) { int decimalCount = 0; if (isEmpty(theStr)) { return false; } else { for (int i = 0; i < theStr.length(); i++) { if (!Character.isDigit(theStr.charAt(i))) { if (theStr.charAt(i) == ('.')) { decimalCount++; continue; } else { return false; } } } if (decimalCount == 1) return true; else return false; } } public static boolean contains(String[] array, String aString, boolean ignoreCase) { boolean containsString = false; for (int i = 0; i < array.length; i++) { if (ignoreCase) { if (array[i].equalsIgnoreCase(aString)) containsString = true; } else { if (array[i].equals(aString)) containsString = true; } } return containsString; } public static String[] add(String[] x, String aString) { String[] result = new String[x.length + 1]; for (int i = 0; i < x.length; i++) { result[i] = x[i]; } result[x.length] = aString; return result; } /** * Convert a string with multiple words separated by space to one word, with * first letter as lower case. * * @param words * @return */ public static String getOneWordLowerCaseFirstLetter(String words) { // remove space in words and make the first letter lower case. String oneWord = words; if (!isEmpty(words)) { String firstLetter = words.substring(0, 1); oneWord = words.replaceFirst(firstLetter, firstLetter.toLowerCase()).replace(" ", ""); } return oneWord; } /** * Convert a string with multiple words separated by space to one word, with * first letter as upper case. * * @param words * @return */ public static String getOneWordUpperCaseFirstLetter(String words) { // remove space in words and make the first letter lower case. String firstLetter = words.substring(0, 1); String oneWord = words.replaceFirst(firstLetter, firstLetter.toUpperCase()).replace(" ", ""); return oneWord; } /** * Convert to upper case the first letter of each word in the input. Spaces are preserved. * @param strWithWords * @return */ public static String getCamelCaseFormatInWords(String strWithWords) { if (strWithWords == null) return strWithWords; String[] words = strWithWords.split(" "); StringBuilder sb = new StringBuilder(); for (String word : words) { char first = word.charAt(0); String upper = word.replaceFirst(String.valueOf(first), String.valueOf(Character.toUpperCase(first))); sb.append(upper).append(" "); } return sb.toString().trim(); } /** * Parse the text into an array of words using white space as delimiter. * Keeping words in quotes together. * * @param texts * @return */ public static List<String> parseToWords(String text) { if (isEmpty(text)) { return null; } SortedSet<String> wordList = new TreeSet<String>(); // extract words in quotes first String patternStr = "\\B[\"']([^\"']*)[\"']\\B"; String[] nonQuotedTexts = text.split(patternStr); for (String txt : nonQuotedTexts) { String[] nonQuotedWords = txt.split("\\s"); wordList.addAll(Arrays.asList(nonQuotedWords)); } Pattern pattern = Pattern.compile(patternStr); Matcher matcher = pattern.matcher(text); List<String> quotedWords = new ArrayList<String>(); int start = 0; while (matcher.find(start)) { String quotedWord = matcher.group(1).trim(); quotedWords.add(quotedWord); start = matcher.end(1); } wordList.addAll(quotedWords); return new ArrayList<String>(wordList); } public static List<String> parseToWords(String text, String delimiter) { if (isEmpty(text)) { return null; } String[] words = text.trim().split(delimiter); List<String> wordList = new ArrayList<String>(); for (String word : words) { if (!isEmpty(word)) { wordList.add(word.trim()); } } return wordList; } public static Boolean containsIgnoreCase(Collection<String> collection, String match) { for (String str : collection) { if (str.trim().equalsIgnoreCase(match.trim())) { return true; } } return false; } public static String stripWildcards(String searchString) { // strip start from either ends of the search string String newString = searchString.trim(); newString = newString.replaceAll("^(\\*)(.*)", "$2"); newString = newString.replaceAll("(.*)(\\*)$", "$1"); return newString; } public static Boolean xssValidate(String inputString) { // the input string can't contain patterns like <script>, javascript: // and their HEX representation and unicode version, HTML entity // encoded version, UTF-7 encoded version String[] scriptPatterns = new String[] { "\\<script", "\\<\\%00script", "\\%3C\\%73\\%63\\%72\\%69\\%70\\%74", // hex encoded for // URL for <script "<script", // hex // encoded // version // of // <script "<script", // unicode // version // of // <script "<%00script", // html // entity // encoded // version // of // <%00script "\\%uff1cscript\\%uff1e", "\\%BC\\%F3\\%E3\\%F2\\%E9\\%F0\\%F4", // hex version of // <script "\\+ADw\\-SCRIPT\\+AD4", "\\\\u003Cscript" }; // <script String[] javascriptPatterns = new String[] { "javascript\\:", "\\%6A\\%61\\%76\\%61\\%73\\%63%\\%72\\%69\\%70\\%74\\%3A", // hex // version // of // javascript: "javascript:", // hex // encoded // version // of // javascript: "javascript:" }; // HTML // entity // encoded // version // of // javascript: String[] phishingPatterns = new String[] { "\\<iframe", "\\<frame" }; String[] otherPatterns = new String[] { "etc/passwd", "/bin/id", "\\.ini", ";vol\\|", "id\\|", "AVAK\\$\\(RETURN_CODE\\)OS", "sys\\.dba_user", "\\+select\\+", "\\+and\\+", "WFXSSProbe", "WF_XSRF", "alert\\(", "TEXT/VBSCRIPT", "=\"", "\\.\\./", "\\.\\.\\\\", "\\\\\'", "\\\\\\\"", "background\\:", "\\'\\+", "\\\"\\+", "%\\d+" }; String patternStr = StringUtils.join(scriptPatterns, "|") + "|" + StringUtils.join(javascriptPatterns, "|") + "|" + StringUtils.join(phishingPatterns, "|") + "|" + StringUtils.join(otherPatterns, "|"); String regex = "^(?!.*(" + patternStr + ")).*$"; // String regex = // "^(?!.*(TEXT\\/VBSCRIPT|%uff1cscript%uff1e|WFXSSProbe|\\=\"|\\+and\\+|\\+select\\+|sys\\.dba\\_user|AVAK\\$\\(RETURN\\_CODE\\)OS|id\\||;vol\\||&#|%\\d+|\\>|\\<|\\.\\.\\\\|\\.\\.\\/|\\.ini|javascript\\:|\\/etc\\/passwd|\\/bin\\/id|\\\'|\\\"|background\\:expression)).*$"; // System.out.println(regex); if (inputString.matches(regex)) { return true; } else { return false; } } /** * Return true for Null or empty string, false otherwise. */ public static boolean isEmpty(String str) { return (str == null || str.trim().length() == 0); } public static void main(String[] args) { try { String text = "this is 'a test' of \"parsing words\""; System.out.println(text); List<String> words = StringUtils.parseToWords(text); for (String word : words) { System.out.println(word); } String text2 = "thomas\r\nshukla"; System.out.println(text2); List<String> words2 = StringUtils.parseToWords(text2, "\r\n"); for (String word : words2) { System.out.println(word); } String text3 = "*NCL-100**"; System.out.println(stripWildcards(text3)); String text4 = "this is a test \\u003Cscript\\u003Ealert\\u0028613067\\u0029\u003C/script\u003E in a sentence"; String text5 = "<iframe src=http://demo.testfire.net/phishing.html>"; String text6 = "alert(1234)"; if (StringUtils.xssValidate(text6)) { System.out.println("pass xss validation"); } else { System.out.println("didn't pass xss validation"); } } catch (Exception e) { logger.error(e); } } public static String[] removeFromArray(String[] oldArray, String stringToRemove) { List<String> list = new ArrayList<String>(Arrays.asList(oldArray)); list.remove(stringToRemove); return list.toArray(new String[0]); } }