Java tutorial
/** * Copyright (C) 2011 rwitzel75@googlemail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.rwitzel.streamflyer.xml; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.input.XmlStreamReader; import com.github.rwitzel.streamflyer.core.AfterModification; import com.github.rwitzel.streamflyer.core.Modifier; import com.github.rwitzel.streamflyer.internal.thirdparty.ZzzValidate; import com.github.rwitzel.streamflyer.util.ModificationFactory; /** * Replaces the XML version in the XML prolog with the given XML version. Adds an XML prolog if the stream does not * contain an XML prolog. * <p> * <h1>Contents</h1> * <p> * <b> <a href="#g1">1. How and when do I use this modifier?</a><br/> * <a href="#g2">2. Do I have to care about BOMs at the beginning of the stream?</a> <br/> * <a href="g3">3. Is there any known limitation?</a> <br/> * <a href="#g4">4. How much memory does the modifier consume?</a><br/> * </b> <!-- ++++++++++++++++++++++++++++++ --> * <p> * <h3 id="g1">1. How and when do I use this modifier?</h3> * <p> * This modifier is an alternative to {@link InvalidXmlCharacterModifier} if you a have characters in an XML stream that * are valid for XML 1.1 documents but invalid for XML 1.1 documents. In this case you use this modifier to change the * XML version in the prolog of the document. * <p> * EXAMPLE: * <code><pre class="prettyprint lang-java">// choose the input stream to modify ByteArrayInputStream inputStream = new ByteArrayInputStream( bytesWithBom); // wrap the input stream by BOM skipping reader Reader reader = new XmlStreamReader(inputStream); // create the reader that changes the XML version to 1.1 ModifyingReader modifyingReader = new ModifyingReader(reader, new XmlVersionModifier("1.1", 8192)); // use the modifying reader instead of the original reader String xml = IOUtils.toString(modifyingReader); assertTrue(xml.startsWith("<?xml version='1.1'")); </pre></code> * <h3 id="g2">2. Do I have to care about BOMs at the beginning of the stream?</h3> * <p> * Yes, you must use a BOM skipping reader that wraps the input stream. Apache's Commons IO {@link XmlStreamReader} does * this for you. * <h3 id="g3">3. Is there any known limitation?</h3> * <p> * Yes, this modifier throws a {@link XmlPrologRidiculouslyLongException} if the prolog of the XML document contains * more than {@link #INITIAL_NUMBER_OF_CHARACTERS} characters. This can only happen if there is a lot of whitespace * within the prolog, which is highly unlikely but not forbidden by the XML specification. You should know that even the * <code>XmlReader</code> of Apache Commons which you probably use to detect the encoding cannot deal with such a kind * of prolog. * <h3 id="#g4">4. How much memory does the modifier consume?</h3> * <p> * The memory consumption of this modifier during the stream processing is roughly given by the second argument of * {@link #XmlVersionModifier(String, int)} but the initial memory consumption is given by * {@link #INITIAL_NUMBER_OF_CHARACTERS}. * * @author rwoo * @since 27.06.2011 */ public class XmlVersionModifier implements Modifier { // // constants // public final int INITIAL_NUMBER_OF_CHARACTERS = 4096; /** * The internal state of {@link XmlVersionModifier}. * <p> * The state transitions are: from <code>INITIAL</code> to <code>PROLOG_REQUEST</code> to * <code>NO_LONGER_MODIFYING</code>. */ private enum XmlVersionModifierState { /** * The initial state. No input read yet. */ INITIAL, /** * The modifier has requested to read the XML prolog. */ PROLOG_REQUEST, /** * The modifier has read the XML prolog, modified it if necessary. Nothing more to do for the modifier. */ NO_LONGER_MODIFYING } // // injected properties // protected ModificationFactory factory; protected String xmlVersion; // // properties that represent the mutable state // private XmlVersionModifierState state = XmlVersionModifierState.INITIAL; // // constructors // public XmlVersionModifier(String xmlVersion, int newNumberOfChars) { ZzzValidate.notNull(xmlVersion, "xmlVersion must not be null"); this.factory = new ModificationFactory(0, newNumberOfChars); this.xmlVersion = xmlVersion; } // // Modifier.* methods // /** * @see com.github.rwitzel.streamflyer.core.Modifier#modify(java.lang.StringBuilder, int, boolean) */ @Override public AfterModification modify(StringBuilder characterBuffer, int firstModifiableCharacterInBuffer, boolean endOfStreamHit) { switch (state) { case NO_LONGER_MODIFYING: return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit); case INITIAL: state = XmlVersionModifierState.PROLOG_REQUEST; // you never know how many whitespace characters are in the prolog return factory.modifyAgainImmediately(INITIAL_NUMBER_OF_CHARACTERS, firstModifiableCharacterInBuffer); case PROLOG_REQUEST: // (Should we do aware of BOMs here? No. I consider it the // responsibility of the caller to provide characters without BOM.) Matcher matcher = Pattern.compile("<\\?xml[^>]*version\\s*=\\s*['\"]((1.0)|(1.1))['\"].*") .matcher(characterBuffer); if (matcher.matches()) { // replace version in prolog characterBuffer.replace(matcher.start(1), matcher.end(1), xmlVersion); } else { // is there a prolog that is too long? Matcher matcher2 = Pattern.compile("<\\?xml.*").matcher(characterBuffer); if (matcher2.matches()) { // this is not normal at all -> throw exception throw new XmlPrologRidiculouslyLongException(characterBuffer.toString()); } // insert prolog characterBuffer.insert(0, "<?xml version='" + xmlVersion + "'>"); } state = XmlVersionModifierState.NO_LONGER_MODIFYING; return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit); default: throw new IllegalStateException("state " + state + " not supported"); } } }