Java tutorial
package de.fhg.iais.asc.transformer.jdom.handler; /****************************************************************************** * Copyright 2011 (c) Fraunhofer IAIS Netmedia http://www.iais.fraunhofer.de * * ************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); you may * * not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * ******************************************************************************/ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.jdom.Document; import org.junit.Assert; import org.junit.Test; import de.fhg.iais.cortex.model.aip.util.XmlProcessor; import de.fhg.iais.cortex.model.aip.util.XmlUtils; public class XmlFieldStripperTest extends AbstractXmlFieldHandlerTest { private static final String XML = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><root><header/><body><listing>" + "<record no=\"1\"><creator>Baal \n\t `[3]-_{bde} </creator></record>" + "<record no=\"2\"><creator> \nMeier \n\t `[3]-_ </creator></record>" + "<record no=\"3\"><creator>**+#=\" Schulze, Meier</creator></record>" + "</listing></body></root>"; private static final String XML2 = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><root><node>MYTEXT</node></root>"; private static final String FIELDPATTERN = null;// "//creator"; private static final String CONTENTPATTERN = null; // "(.|\\s)*"; @Test public void testSampler() throws Exception { Document xdoc = XmlProcessor.buildDocumentFrom(XML); XmlFieldStripper s = new XmlFieldStripper(FIELDPATTERN, CONTENTPATTERN); xdoc = this.transform(s, xdoc); String output = XmlUtils.elementToString(true, xdoc.getRootElement()); System.out.println(output); final int p = StringUtils.countMatches(output, "_"); Assert.assertTrue("Expected 2 strings '_', found " + p, p == 2); final int q = StringUtils.countMatches(output, "`"); Assert.assertTrue("Expected 2 strings '`', found " + q, q == 2); } @Test public void testSampler2() throws Exception { Document xdoc = XmlProcessor.buildDocumentFrom(XML2); XmlFieldStripper s = new XmlFieldStripper(FIELDPATTERN, CONTENTPATTERN); xdoc = transform(s, xdoc); String output = XmlUtils.elementToString(true, xdoc.getRootElement()); System.out.println(output); final int p = StringUtils.countMatches(output, "MYTEXT"); Assert.assertTrue("Expected 1 strings 'MYTEXT', found " + p, p == 1); } @Test public void testMatcherBug() throws Exception { // System.out.println ("Encoding of files is "+System.getProperty("file.encoding")); // System.setProperty("file.encoding", value) String teststring = "Aus: Braun/Hogenberg, 4, 31. Beschreibung in der Kartusche am unteren Bildrand: \"Gottorpia Fortibus Gottis habet arx " + "Gottorpia nomen, Holsatici dudum quam tenure duces. Jam studet hanc nec non munire laborat Adolphus, " + "Spe ibi quod Princeps inclitus esse solet. Slesuigum. Panicies inter Scotos spec tatur et Anglos Quae licet " + "exigua est, quora bina secat Et chersonesi Cimbrorum dicitur estus Non procul Trena flumine uelifera. " + "Cingitur hic Is`thmus uasso, uassumqu uocatur Danorum, struxit quod Godefridus opus Slesuigum porro nos illam " + "dicimus inde, Quod maris ad curuum est dificata sinum, Ampla fuit quondam, " + "portuqu excepit amico Omnigenas merces, quas maris vnda uehit Res ea decreuit sic vt non amplius vrbis Sed " + "tantum oppiduli, nomen habere queat. Ex Generosi ac Nobilis Viri D. HENRICI RANSOVII Muso M. D. LXXX IV. " + "Coloni Cum priuilegio C?sareo. A. Pons lapideus B. Castrum D. Georgij, " + "vulgo. Die Jrgens Burch C. Vetus delubrum; Nunc, Diuo Michaeli dicatum. D. Ecclesia Cathedralis. E. Episcopi " + "domus. F. Turris S. Spiritus, dificata negotiatoribus Anglis G. Curia. H. Reninsula, " + "Holm germanic dicta I. Monasterium dicatar deo nobilium uirginum K. Hic incipit munimentum Gotfridi Danorum " + "Regis Inde nomen germanic obtinens, Dennewerck. Ductum ab orientali Maris sinu, vsque ad Occidentalem Oceanum."; Pattern cp = Pattern.compile("(?s).*"); Matcher m = cp.matcher(teststring); Assert.assertTrue(m.matches()); } }