de.fhg.iais.asc.transformer.jdom.handler.XmlFieldStripperTest.java Source code

Java tutorial

Introduction

Here is the source code for de.fhg.iais.asc.transformer.jdom.handler.XmlFieldStripperTest.java

Source

package de.fhg.iais.asc.transformer.jdom.handler;

/******************************************************************************
 * Copyright 2011 (c) Fraunhofer IAIS Netmedia  http://www.iais.fraunhofer.de *
 * ************************************************************************** *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
 * not use this file except in compliance with the License.                   *
 * You may obtain a copy of the License at                                    *
 * http://www.apache.org/licenses/LICENSE-2.0                                 *
 * Unless required by applicable law or agreed to in writing,                 *
 * software distributed under the License is distributed on an "AS IS" BASIS, *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
 * See the License for the specific language governing permissions and        *
 * limitations under the License.                                             *
 ******************************************************************************/

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.jdom.Document;
import org.junit.Assert;
import org.junit.Test;

import de.fhg.iais.cortex.model.aip.util.XmlProcessor;
import de.fhg.iais.cortex.model.aip.util.XmlUtils;

public class XmlFieldStripperTest extends AbstractXmlFieldHandlerTest {

    private static final String XML = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><root><header/><body><listing>"
            + "<record no=\"1\"><creator>Baal \n\t  `[3]-_{bde}   </creator></record>"
            + "<record no=\"2\"><creator>    \nMeier \n\t  `[3]-_ </creator></record>"
            + "<record no=\"3\"><creator>**+#=\" Schulze, Meier</creator></record>" + "</listing></body></root>";
    private static final String XML2 = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><root><node>MYTEXT</node></root>";
    private static final String FIELDPATTERN = null;// "//creator";
    private static final String CONTENTPATTERN = null; // "(.|\\s)*";

    @Test
    public void testSampler() throws Exception {

        Document xdoc = XmlProcessor.buildDocumentFrom(XML);
        XmlFieldStripper s = new XmlFieldStripper(FIELDPATTERN, CONTENTPATTERN);
        xdoc = this.transform(s, xdoc);

        String output = XmlUtils.elementToString(true, xdoc.getRootElement());

        System.out.println(output);

        final int p = StringUtils.countMatches(output, "_");
        Assert.assertTrue("Expected 2 strings '_', found " + p, p == 2);
        final int q = StringUtils.countMatches(output, "`");
        Assert.assertTrue("Expected 2 strings '`', found " + q, q == 2);
    }

    @Test
    public void testSampler2() throws Exception {

        Document xdoc = XmlProcessor.buildDocumentFrom(XML2);
        XmlFieldStripper s = new XmlFieldStripper(FIELDPATTERN, CONTENTPATTERN);
        xdoc = transform(s, xdoc);

        String output = XmlUtils.elementToString(true, xdoc.getRootElement());

        System.out.println(output);

        final int p = StringUtils.countMatches(output, "MYTEXT");
        Assert.assertTrue("Expected 1 strings 'MYTEXT', found " + p, p == 1);
    }

    @Test
    public void testMatcherBug() throws Exception {
        // System.out.println ("Encoding of files is "+System.getProperty("file.encoding"));
        // System.setProperty("file.encoding", value)
        String teststring = "Aus: Braun/Hogenberg, 4, 31. Beschreibung in der Kartusche am unteren Bildrand: \"Gottorpia Fortibus  Gottis habet arx "
                + "Gottorpia nomen, Holsatici dudum quam tenure duces. Jam studet hanc nec non munire laborat Adolphus, "
                + "Spe ibi quod Princeps inclitus esse solet. Slesuigum. Panicies inter Scotos spec tatur et Anglos Quae licet "
                + "exigua est, quora bina secat Et chersonesi Cimbrorum dicitur estus Non procul  Trena flumine uelifera. "
                + "Cingitur hic Is`thmus uasso, uassumqu uocatur Danorum, struxit quod Godefridus opus Slesuigum porro nos illam "
                + "dicimus inde, Quod maris ad curuum est dificata sinum, Ampla fuit quondam, "
                + "portuqu excepit amico Omnigenas merces, quas maris vnda uehit Res ea decreuit sic vt non amplius vrbis Sed "
                + "tantum oppiduli, nomen habere queat. Ex Generosi ac Nobilis Viri D. HENRICI RANSOVII Muso M. D. LXXX IV. "
                + "Coloni Cum priuilegio C?sareo. A. Pons lapideus B. Castrum D. Georgij, "
                + "vulgo. Die Jrgens Burch C. Vetus delubrum; Nunc, Diuo Michaeli dicatum. D. Ecclesia Cathedralis. E. Episcopi "
                + "domus. F. Turris S. Spiritus, dificata  negotiatoribus Anglis G. Curia. H. Reninsula, "
                + "Holm germanic dicta I. Monasterium dicatar deo nobilium uirginum K. Hic incipit munimentum Gotfridi Danorum "
                + "Regis Inde nomen germanic obtinens, Dennewerck. Ductum ab orientali Maris sinu, vsque ad Occidentalem Oceanum.";

        Pattern cp = Pattern.compile("(?s).*");
        Matcher m = cp.matcher(teststring);
        Assert.assertTrue(m.matches());
    }
}