Java tutorial
/* * Adito * * Copyright (C) 2003-2006 3SP LTD. All Rights Reserved * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package com.adito.boot; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * <p> * A helper that provides facilities for a list of regular expressions to be * built up and the performed on some content in one go, with the replacement * value being provided by a callback method. * <p> * This class represents the notion of a replacement engine. The user of this * object creates an instance of a replacement engine then adds all of the * patterns that they wish to be search for (using * {@link #addPattern(String, Replacer, String)}. * <p> * This method also expects a {@link com.adito.boot.Replacer} * implementation to be provided. When the engine is instructed to process some * content, every time a match is found the appropriate * {@link com.adito.boot.Replacer#getReplacement(Pattern, Matcher, String)} * method will be called. The value that is returned from this method will then * replace the matched string. * <p> * The two primary uses for this class can be found in the <i>Replacement proxy</i> * feature and the <i>Appplication Extension</i>. Replacement proxy uses it to * replace hyperlinks with HTML content with proxied links and the Application * Extension uses it to dynamically replace provided application arguments with * pre-defined strings. * <p> * As processing regular expressions can be a fairly intensive task , a cached * pool of compile regular expression is also maintained as they may be re-used. */ public class ReplacementEngine { final static Log log = LogFactory.getLog(ReplacementEngine.class); // Private instance variables private final StringBuffer inputBuffer = new StringBuffer(); private final StringBuffer workBuffer = new StringBuffer(); private List replacementsList = new ArrayList(); private boolean caseSensitive = true; private boolean dotAll = false; private static PatternPool patternPool; private String charset; private Encoder encoder; /** * Constructor */ public ReplacementEngine() { patternPool = new PatternPool(); } /** * Set the {@link Encoder} implementation to use. This is a callback * interface that has the oppurtunity to do post processing on any * replaced values. * * @param encoder */ public void setEncoder(Encoder encoder) { this.encoder = encoder; } /** * Set the character encoding of the content to replace * * @param charset character encoding */ public void setEncoding(String charset) { this.charset = charset; } /** * Get the instance of the pattern pool. * * @return instance of pattern pool */ public static PatternPool getPatternPool() { if (patternPool == null) { patternPool = new PatternPool(); } return patternPool; } /** * Set whether every match on every record should be processed * * @param dotAll process every match on every record */ public void setDotAll(boolean dotAll) { this.dotAll = dotAll; } /** * Set whether the matching is case sensitive * * @param caseSensitive is case sensitve */ public void setCaseSensitive(boolean caseSensitive) { this.caseSensitive = caseSensitive; } /** * Add a new pattern to the replacement engine. * * @param pattern patter to search for * @param replacer replace containing callback to get replacement value * @param replacementPattern optional replacement pattern (replacer * implementation may require it) */ public synchronized void addPattern(String pattern, Replacer replacer, String replacementPattern) { // Pattern p = Pattern.compile(pattern, ( caseSensitive ? 0 : // Pattern.CASE_INSENSITIVE ) + ( dotAll ? Pattern.DOTALL : 0 ) ); replacementsList.add(new ReplaceOp(replacer, pattern, replacementPattern)); } /** * Replace all occurences of all registered pattern in the provided string * and return the result as a second string * * @param input input string * @return processed string */ public synchronized String replace(String input) { Iterator it = replacementsList.iterator(); inputBuffer.setLength(0); inputBuffer.append(input); workBuffer.setLength(0); workBuffer.ensureCapacity(input.length()); if (log.isDebugEnabled()) log.debug("Starting replacement on string on " + input.length() + " characters"); while (it.hasNext()) { ReplaceOp op = (ReplaceOp) it.next(); if (log.isDebugEnabled()) log.debug("Replacemnt " + op.replacePattern + " [" + op.pattern + "]"); Pattern p = getPatternPool().getPattern(op.pattern, caseSensitive, dotAll); if (log.isDebugEnabled()) log.debug("Got pattern from pool"); try { replaceInto(p, op.replacePattern, op.replacer, inputBuffer, workBuffer); if (log.isDebugEnabled()) log.debug("Replacement complete"); } catch (Throwable t) { if (log.isDebugEnabled()) log.debug("Error replacing.", t); } finally { if (log.isDebugEnabled()) log.debug("Releasing pattern from pool."); patternPool.releasePattern(p); } inputBuffer.setLength(0); inputBuffer.append(workBuffer); } if (log.isDebugEnabled()) log.debug("Finished replacing. Returning string of " + inputBuffer.length() + "characters"); return (inputBuffer.toString()); } /** * Replace all occurences of all registered patterns in all data read from * the input stream and write the processed content back to the provided * output stream. * <p> * <b>Note, this method current reads the entire stream into memory as a * string before performing the replacements. Beware of this when are using * this method. A more efficient method may come later.</b> * * @param in input stream * @param out output stream * @return bytes ready * @throws IOException on any IO error */ public long replace(InputStream in, OutputStream out) throws IOException { if (log.isDebugEnabled()) log.debug("Replacing using streams, reading stream into memory"); StringBuffer str = new StringBuffer(4096); byte[] buf = new byte[32768]; int read; while ((read = in.read(buf)) > -1) { str.append(charset == null ? new String(buf, 0, read) : new String(buf, 0, read, charset)); if (log.isDebugEnabled()) log.debug("Got block of " + read + ", waiting for next one"); } if (log.isDebugEnabled()) log.debug("Read all blocks, performing replacement"); byte[] b = charset == null ? replace(str.toString()).getBytes() : replace(str.toString()).getBytes(charset); if (log.isDebugEnabled()) log.debug("Writing replaced content back (" + b.length + " bytes)"); out.write(b); return b.length; } // Supporting methods private void replaceInto(Pattern pattern, String replacementPattern, Replacer replacer, StringBuffer input, StringBuffer work) { work.ensureCapacity(input.length()); work.setLength(0); if (log.isDebugEnabled()) log.debug("Getting matcher for " + pattern.pattern()); Matcher m = pattern.matcher(input); log.debug("Got matcher, finding first occurence."); while (m.find()) { if (log.isDebugEnabled()) log.debug("Found occurence '" + m.group() + "'"); String repl = replacer.getReplacement(pattern, m, replacementPattern); if (repl != null) { if (log.isDebugEnabled()) log.debug("Found replacement, appending '" + repl + "'"); if (encoder == null) { m.appendReplacement(work, Util.escapeForRegexpReplacement(repl)); } else { m.appendReplacement(work, encoder.encode(Util.escapeForRegexpReplacement(repl))); } } } if (log.isDebugEnabled()) log.debug("Processed matches, appending replacement."); m.appendTail(work); } // Supporting classes class ReplaceOp { String pattern; Replacer replacer; String replacePattern; ReplaceOp(Replacer replacer, String pattern, String replacePattern) { this.replacer = replacer; this.pattern = pattern; this.replacePattern = replacePattern; } } /** * A cached pool of compiled regular expressions. */ public static class PatternPool { private HashMap patterns; private HashMap locks; PatternPool() { patterns = new HashMap(); locks = new HashMap(); } /** * Get a compiled {@link Pattern} given the regular expression as text, * whether the match should be case sensitive and whether all matches on * every record (line) should be processed. * <p> * When a pattern is first requested a pool of 10 instances are created. * Sub-sequent requests for the same pattern will then return one of * these instances. The pattern will then be locked until * {@link #releasePattern(Pattern)} is called. If there are no unlocked * patterns in the pool, the caller will be blocked until one becomes * available. * * @param pattern pattern * @param caseSensitive case sensitive match * @param dotAll match all matches on a single record * @return compiled patter */ public Pattern getPattern(String pattern, boolean caseSensitive, boolean dotAll) { String cacheKey = pattern + "_" + caseSensitive + "_" + dotAll; List pool = null; synchronized (patterns) { pool = (List) patterns.get(cacheKey); if (pool == null) { pool = new ArrayList(); patterns.put(cacheKey, pool); } } synchronized (pool) { while (true) { if (pool.size() < 10) { Pattern p = Pattern.compile(pattern, (!caseSensitive ? Pattern.CASE_INSENSITIVE : 0) + (dotAll ? Pattern.DOTALL : 0)); pool.add(p); locks.put(p, p); if (log.isDebugEnabled()) log.debug("Created new pattern and locked"); return p; } else { for (Iterator i = pool.listIterator(); i.hasNext();) { Pattern p = (Pattern) i.next(); if (!locks.containsKey(p)) { if (log.isDebugEnabled()) log.debug("Found a free pattern"); locks.put(p, p); return p; } } synchronized (locks) { try { if (log.isDebugEnabled()) log.debug("No free patterns, waiting for one to become available"); locks.wait(); } catch (Exception e) { } } } } } } /** * Release a patterns lock. * * @param pattern pattern to release * @see #getPattern(String, boolean, boolean) */ public void releasePattern(Pattern pattern) { synchronized (locks) { locks.remove(pattern); locks.notifyAll(); } } } public interface Encoder { public String encode(String decoded); } }