Java tutorial
/******************************************************************************* * Copyright (c) 2009 David Harrison. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Public License v3.0 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/gpl-3.0.html * * Contributors: * David Harrison - initial API and implementation ******************************************************************************/ package com.sfs; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.Calendar; import java.util.Set; import java.util.HashSet; import java.util.TreeMap; import java.util.StringTokenizer; import org.apache.commons.lang.StringUtils; import org.clapper.util.html.HTMLUtil; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Node; import org.jsoup.safety.Whitelist; /** * The DataFilter class has a range of utility functions for parsing and * filtering incoming data. */ public class DataFilter { /** The Constant REPLACE_WHAT. */ private static final String[] REPLACE_WHAT = { "<P>", "</P>", "<BR>", "\n", "<STRONG>", "</STRONG>", "<EM>", "</EM>", "<U>", "</U>", "<OL>", "</OL>", "<UL>", "</UL>", "<LI>", "<LI>", "<SUP>", "</SUP>", "<SUB>", "</SUB>" }; /** The Constant REPLACE_TO. */ private static final String[] REPLACE_TO = { "%p%", "%/p%", "%br%", "%br%", "%b%", "%/b%", "%i%", "%/i%", "%u%", "%/u%", "%ol%", "%/ol%", "%ul%", "%/ul%", "%li%", "%/li%", "%sup%", "%/sup%", "%sub%", "%/sub%" }; /** The Constant ALLOWED_HTML. */ private static final String[] ALLOWED_HTML = { "<p>", "</p>", "<br/>", "<br/>", "<b>", "</b>", "<i>", "</i>", "<u>", "</u>", "<ol>", "</ol>", "<ul>", "</ul>", "<li>", "</li>", "<sup>", "</sup>", "<sub>", "</sub>" }; /** The Constant DOUBLE_QUOTE. */ private static final String DOUBLE_QUOTE = "\""; /** The Constant WHITESPACE_AND_QUOTES. */ private static final String WHITESPACE_AND_QUOTES = " \t\r\n\""; /** The Constant QUOTES_ONLY. */ private static final String QUOTES_ONLY = "\""; /** * Instantiates a new data filter. */ protected DataFilter() { throw new UnsupportedOperationException(); } /** * Convert the supplied HTML to plain text. * * @param html the html * @return the string */ public static String html2Text(final String html) { return Jsoup.parse(html).text(); } /** * Gets the html. * * @param original the original * * @return the html */ public static String getHtml(final String original) { String htmlString = ""; if (original != null) { StringBuffer html = new StringBuffer(original.length()); char n; for (int i = 0; i < original.length(); i++) { n = original.charAt(i); if (n == '>') { html.append(">"); } else if (n == '<') { html.append("<"); } else if (n == '&') { html.append("&"); } else { html.append(n); } } htmlString = html.toString(); } return htmlString; } /** * Gets the safe xml. * * @param input the input * * @return the safe xml */ public static String getSafeXml(final String input) { String safeXML = ""; if (StringUtils.isNotBlank(input)) { // We do not want to convert these characters for this function safeXML = StringUtils.replace(input, ">", "#gt#;"); safeXML = StringUtils.replace(safeXML, "<", "#lt#;"); safeXML = StringUtils.replace(safeXML, "&", "#amp#;"); safeXML = HTMLUtil.convertCharacterEntities(safeXML); safeXML = StringUtils.replace(safeXML, "<br>", "<br/>"); // Now convert the < and > characters back to normal safeXML = StringUtils.replace(safeXML, "#gt#;", ">"); safeXML = StringUtils.replace(safeXML, "#lt#;", "<"); safeXML = StringUtils.replace(safeXML, "#amp#;", "&"); } return safeXML; } /** * Capitalise first. * * @param input the input * * @return the string */ public static String capitaliseFirst(final String input) { String output = ""; if (input != null) { if (input.length() > 0) { output = Character.toUpperCase(input.charAt(0)) + input.substring(1); } } return output; } /** * Parses the date. * * @param strDate the str date * @param defaultcurrent the defaultcurrent * * @return the date */ public static Date parseDate(final String strDate, final boolean defaultcurrent) { Date date = null; if (strDate != null) { date = parseDate(strDate); } if (date == null && defaultcurrent) { date = Calendar.getInstance().getTime(); } return date; } /** * Parses the date. * * @param dateString the date string * @param format the format * @param defaultCurrent the default current * * @return the string */ public static String parseDate(final String dateString, final String format, final boolean defaultCurrent) { return Formatter.numericDate(parseDate(dateString, defaultCurrent), format); } /** * Parses the conventional date. * * @param dateString the date string * @param defaultCurrent the default current * * @return the string */ public static String parseConventionalDate(final String dateString, final boolean defaultCurrent) { return Formatter.conventionalDate(parseDate(dateString, defaultCurrent)); } /** * Parses the date. * * @param strDate the str date * * @return the date */ private static Date parseDate(final String strDate) { final String[] dateFormats = { "dd/MM/yyyy", "dd.MM.yyyy", "dd-MM-yyyy", "dd/M/yyyy", "dd.M.yyyy", "dd-M-yyyy", "d/M/yyyy", "d.M.yyyy", "d-M-yyyy", "EEEE dd MMMM yyyy" }; final String[] timeFormats = { "HH:mm", "hh:mm a" }; Date date = null; for (int i = 0; i < dateFormats.length; i++) { if (date == null) { final String dateFormat = dateFormats[i]; try { final SimpleDateFormat df = new SimpleDateFormat(dateFormat); date = df.parse(strDate); } catch (ParseException e) { // Error parsing date - catch but don't do anything date = null; } } } if (date == null) { // Iterate through timeFormats and add date formats for (int x = 0; x < timeFormats.length; x++) { if (date == null) { final String timeFormat = timeFormats[x]; for (int i = 0; i < dateFormats.length; i++) { if (date == null) { final String dateFormat = dateFormats[i]; try { final SimpleDateFormat df = new SimpleDateFormat(timeFormat + " " + dateFormat); date = df.parse(strDate); } catch (ParseException e) { // Error parsing date - catch but don't do anything date = null; } } } } } } return date; } /** * Parses the currency. * * @param currencyVal the currency string * * @return the double */ public static double parseCurrency(final String currencyVal) { double currency = 0; if (currencyVal != null) { String currencyString = StringUtils.replace(currencyVal, "$", ""); currencyString = StringUtils.replace(currencyString, ",", ""); try { currency = Double.parseDouble(currencyString); } catch (NumberFormatException nfe) { // Catch this exception but do nothing currency = 0; } } return currency; } /** * Parses the integer. * * @param stringValue the string value * @return the int */ public static int parseInteger(final String stringValue) { int result = 0; if (StringUtils.isNotBlank(stringValue)) { try { result = Integer.parseInt(stringValue); } catch (NumberFormatException nfe) { result = 0; } } return result; } /** * This function ensures no hazardous html formating is sent to the database * or sent back to the client. Only the specified html tags are left after * this filter, the rest are turned into > and < * * @param original the original * * @return the string */ public static String convert2XML(final String original) { String finalisedText = original; for (int i = 0; i < REPLACE_WHAT.length; i++) { finalisedText = StringUtils.replace(finalisedText, REPLACE_WHAT[i], REPLACE_TO[i]); } // Replace characters with space finalisedText = StringUtils.replace(finalisedText, " ", " "); // Now run this modified text through the HTML filter to get // rid of all the bad characters String finalisedHtml = getHtml(finalisedText); // With bad characters removed bring back the xml tags we want for (int i = 0; i < REPLACE_TO.length; i++) { finalisedHtml = StringUtils.replace(finalisedHtml, REPLACE_TO[i], ALLOWED_HTML[i]); } return finalisedHtml; } /** * Strip html comments. * * @param original the original * * @return the string */ public static String stripHtmlComments(final String original) { String html = ""; if (StringUtils.isNotBlank(original)) { String input = StringUtils.replace(original, "-->", "-->"); input = StringUtils.replace(input, "<!--", "<!--"); Document doc = Jsoup.parse(input); removeComments(doc); html = doc.body().html(); } html = Jsoup.clean(html, Whitelist.relaxed()); return StringUtils.replace(html, " ", " "); } /** * Removes the comments. * * @param node the node */ private static void removeComments(Node node) { for (int i = 0; i < node.childNodes().size();) { Node child = node.childNode(i); if (child.nodeName().equals("#comment")) child.remove(); else { removeComments(child); i++; } } } /** * Convert text2 xml. * * @param xml the xml string * * @return the string */ public static String convertText2XML(final String xml) { String xmlString = StringUtils.replace(xml, ">", ">"); xmlString = StringUtils.replace(xmlString, "<", "<"); xmlString = StringUtils.replace(xmlString, "&", "&"); return xmlString; } /** * Parses the numeric string. * * @param numericString the numeric string * * @return the int */ public static int parseNumericString(final String numericString) { String good = "0123456789"; String result = ""; if (numericString != null) { for (int i = 0; i < numericString.length(); i++) { if (good.indexOf(numericString.charAt(i)) >= 0) { result += numericString.charAt(i); } } } int resultingInt = 0; try { resultingInt = Integer.parseInt(result); } catch (NumberFormatException nfe) { resultingInt = 0; } return resultingInt; } /** * Parses the text data into a collection. * * @param text the text * * @return the collection< string> */ public static Collection<String> parseTextDataToCollection(final String text) { final Collection<String> textList = new ArrayList<String>(); if (StringUtils.isNotBlank(text)) { TreeMap<Integer, TreeMap<Integer, String>> parsedMap = DataFilter.parseTextData(text); if (parsedMap != null) { for (Integer row : parsedMap.keySet()) { TreeMap<Integer, String> colMap = parsedMap.get(row); if (colMap != null) { for (Integer col : colMap.keySet()) { final String value = colMap.get(col); if (StringUtils.isNotBlank(value)) { textList.add(value.trim()); } } } } } } return textList; } /** * Parses the text data. * * @param text the text * * @return the tree map< integer, tree map< integer, string>> */ public static TreeMap<Integer, TreeMap<Integer, String>> parseTextData(final String text) { TreeMap<Integer, TreeMap<Integer, String>> parsedData = new TreeMap<Integer, TreeMap<Integer, String>>(); // This counter holds the maximum number of columns provided int maxNumberOfTokens = 0; if (text != null) { StringTokenizer tokenizer = new StringTokenizer(text, "\n"); int lineCounter = 1; while (tokenizer.hasMoreTokens()) { String line = tokenizer.nextToken(); TreeMap<Integer, String> parsedLine = new TreeMap<Integer, String>(); final StringTokenizer tabTokenizer = new StringTokenizer(line, "\t"); if (tabTokenizer.countTokens() > 1) { parsedLine = tokenizerToMap(tabTokenizer); } else { final StringTokenizer commaTokenizer = new StringTokenizer(line, ","); parsedLine = tokenizerToMap(commaTokenizer); } if (parsedLine.size() > maxNumberOfTokens) { maxNumberOfTokens = parsedLine.size(); } parsedData.put(lineCounter, parsedLine); lineCounter++; } } // Now cycle through all the parsed data // Ensure that each row has the same (max) number of tokens for (int rowIndex : parsedData.keySet()) { TreeMap<Integer, String> parsedLine = parsedData.get(rowIndex); // This map holds the final values TreeMap<Integer, String> columnTokens = new TreeMap<Integer, String>(); for (int i = 0; i < maxNumberOfTokens; i++) { int columnIndex = i + 1; if (parsedLine.containsKey(columnIndex)) { String value = parsedLine.get(columnIndex); columnTokens.put(columnIndex, value); } else { columnTokens.put(columnIndex, ""); } } parsedData.put(rowIndex, columnTokens); } return parsedData; } /** * Tokenizer to map. * * @param tokenizer the tokenizer * * @return the tree map< integer, string> */ private static TreeMap<Integer, String> tokenizerToMap(final StringTokenizer tokenizer) { TreeMap<Integer, String> parsedData = new TreeMap<Integer, String>(); int lineCounter = 1; if (tokenizer != null) { while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); parsedData.put(lineCounter, token.trim()); lineCounter++; } } return parsedData; } /** * Parse the user's search box input into a Set of String tokens. * * @param searchText the search text * * @return Set of Strings, one for each word in searchText; here "word" is * defined as either a lone word surrounded by whitespace, or as a * series of words surrounded by double quotes, "like this"; also, * very common words (and, the, etc.) do not qualify as possible * search targets. */ public static HashSet<String> parseSearchText(final String searchText) { HashSet<String> result = new HashSet<String>(); boolean returnTokens = true; String currentDelims = WHITESPACE_AND_QUOTES; StringTokenizer parser = new StringTokenizer(searchText, currentDelims, returnTokens); String token = null; while (parser.hasMoreTokens()) { token = parser.nextToken(currentDelims); if (!isDoubleQuote(token)) { addNonTrivialWordToResult(token, result); } else { currentDelims = flipDelimiters(currentDelims); } } return result; } /** * Use to determine if a particular word entered in the search box should be * discarded from the search. * * @param aSearchTokenCandidate the a search token candidate * * @return true, if checks if is common word */ private static boolean isCommonWord(final String aSearchTokenCandidate) { final Set<String> commonWords = new HashSet<String>(); commonWords.add("a"); commonWords.add("and"); commonWords.add(""); commonWords.add("for"); commonWords.add("from"); commonWords.add("has"); commonWords.add("i"); commonWords.add("in"); commonWords.add("is"); commonWords.add("it"); commonWords.add("of"); commonWords.add("on"); commonWords.add("to"); commonWords.add("the"); commonWords.add("or"); return commonWords.contains(aSearchTokenCandidate); } /** * Text has content. * * @param aText the a text * * @return true, if successful */ private static boolean textHasContent(final String aText) { return (aText != null) && (!aText.trim().equals("")); } /** * Adds the non trivial word to result. * * @param aToken the a token * @param aResult the a result */ private static void addNonTrivialWordToResult(final String aToken, final Set<String> aResult) { if (textHasContent(aToken) && !isCommonWord(aToken.trim())) { aResult.add(aToken.trim()); } } /** * Checks if is double quote. * * @param aToken the a token * * @return true, if is double quote */ private static boolean isDoubleQuote(final String aToken) { return aToken.equals(DOUBLE_QUOTE); } /** * Flip delimiters. * * @param aCurrentDelims the a current delims * * @return the string */ private static String flipDelimiters(final String aCurrentDelims) { String result = null; if (aCurrentDelims.equals(WHITESPACE_AND_QUOTES)) { result = QUOTES_ONLY; } else { result = WHITESPACE_AND_QUOTES; } return result; } }