Java tutorial
/* * Copyright (c) 2010-2011, Martijn Brinkers, Djigzo. * * This file is part of Djigzo email encryption. * * Djigzo is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License * version 3, 19 November 2007 as published by the Free Software * Foundation. * * Djigzo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public * License along with Djigzo. If not, see <http://www.gnu.org/licenses/> * * Additional permission under GNU AGPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or * combining it with aspectjrt.jar, aspectjweaver.jar, tyrex-1.0.3.jar, * freemarker.jar, dom4j.jar, mx4j-jmx.jar, mx4j-tools.jar, * spice-classman-1.0.jar, spice-loggerstore-0.5.jar, spice-salt-0.8.jar, * spice-xmlpolicy-1.0.jar, saaj-api-1.3.jar, saaj-impl-1.3.jar, * wsdl4j-1.6.1.jar (or modified versions of these libraries), * containing parts covered by the terms of Eclipse Public License, * tyrex license, freemarker license, dom4j license, mx4j license, * Spice Software License, Common Development and Distribution License * (CDDL), Common Public License (CPL) the licensors of this Program grant * you additional permission to convey the resulting work. */ package mitm.common.extractor.impl; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.LinkedList; import java.util.List; import mitm.common.extractor.ExtractedPart; import mitm.common.extractor.TextExtractorContext; import mitm.common.extractor.TextExtractorEventHandler; import mitm.common.util.RewindableInputStream; import mitm.common.util.FileConstants; import mitm.common.util.LimitReachedException; import mitm.common.util.SizeUtils; import mitm.test.TestUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.CharEncoding; import org.apache.commons.lang.NotImplementedException; import org.apache.log4j.BasicConfigurator; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; /** * * @author Martijn Brinkers * */ public class UnicodeTextExtractorTest { private static List<ExtractedPart> textParts = new LinkedList<ExtractedPart>(); private static List<RewindableInputStream> toClose = new LinkedList<RewindableInputStream>(); private TextExtractorEventHandler handler = new TextExtractorEventHandlerImpl(); private static int tempFileCount; private static class TextExtractorEventHandlerImpl implements TextExtractorEventHandler { @Override public void textEvent(ExtractedPart textPart) throws IOException { textParts.add(textPart); } @Override public void attachmentEvent(ExtractedPart attachmentPart) { throw new NotImplementedException("text file should not have attachments."); } } private void cleanParts(List<ExtractedPart> parts) throws IOException { for (ExtractedPart part : parts) { part.close(); } parts.clear(); } private void closeInputStreams(List<RewindableInputStream> streams) throws IOException { for (RewindableInputStream stream : streams) { IOUtils.closeQuietly(stream); } streams.clear(); } private static RewindableInputStream readDocument(String filename) throws FileNotFoundException { RewindableInputStream stream = new RewindableInputStream( new BufferedInputStream( new FileInputStream(new File("test/resources/testdata/documents", filename))), SizeUtils.MB * 1); toClose.add(stream); return stream; } @BeforeClass public static void beforeClass() throws Exception { BasicConfigurator.configure(); } @Before public void before() { // get the current nr of temp files tempFileCount = TestUtils.getTempFileCount(FileConstants.TEMP_FILE_PREFIX, ".tmp"); } @After public void after() throws IOException { cleanParts(textParts); closeInputStreams(toClose); // check if we have any temp file leakage assertEquals(tempFileCount, TestUtils.getTempFileCount(FileConstants.TEMP_FILE_PREFIX, ".tmp")); } private void checkText(String text, String header) { String expected = header + "\r\n<note>\r\n" + " <from>Santa Claus</from>\r\n" + " <to>Donald Duck</to>\r\n" + "\r\n" + " <body1>Norwegian: </body1>\r\n" + " <body2>French: </body2>\r\n" + " <body3>Finnish: </body3>\r\n" + " <body4>Greek: </body4>\r\n" + " <body5>Cyrillic: </body5>\r\n" + " <body6>Arabic: ?</body6>\r\n" + " <body7>Hebrew: ?</body7>\r\n" + " <body8>Symbols: </body8>\r\n" + " <body9>Gujarati: ?</body9>\r\n" + " <body10>Tamil: </body10>\r\n" + " <body11>Malayalam: </body11>\r\n" + " <body12>More symbols: </body12>\r\n" + " <body13>Arrows: ?</body13>\r\n" + " <body14>Circled digits: </body14>\r\n" + " <body15>Circled letters: </body15>\r\n" + " <body16>Chess: ?</body16>\r\n" + " <body17>Flowers: ???????</body17>\r\n" + " <body18>Units: ??</body18>\r\n" + " <body19>CJK Ideograph: </body19>\r\n" + " <body20>Katakana: </body20>\r\n" + "\r\n" + "</note>\r\n"; assertEquals(expected, text); } @Test public void testUTF8BOM() throws Exception { UnicodeTextExtractor extractor = new UnicodeTextExtractor(1, Integer.MAX_VALUE); TextExtractorContext context = new TextExtractorContextImpl(); context.setName("xmlUTF8.xml"); extractor.extract(readDocument("xmlUTF8.xml"), context, handler); assertEquals(1, textParts.size()); ExtractedPart part = textParts.get(0); assertEquals("xmlUTF8.xml", part.getContext().getName()); String text = IOUtils.toString(part.getContent(), CharEncoding.UTF_8); checkText(text, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); } @Test public void testUTF8NoBom() throws IOException { UnicodeTextExtractor extractor = new UnicodeTextExtractor(1, Integer.MAX_VALUE); TextExtractorContext context = new TextExtractorContextImpl(); extractor.extract(readDocument("xmlUTF8_NOBOM.xml"), context, handler); assertEquals(1, textParts.size()); ExtractedPart part = textParts.get(0); assertNull(part.getContext().getName()); String text = IOUtils.toString(part.getContent(), CharEncoding.UTF_8); assertTrue(text.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); assertTrue(text.contains("This does not work due to a missing BOM marker")); assertTrue(text.contains("French: ")); assertTrue(text.contains("Units: ??")); } @Test public void testExplicitEncoding() throws IOException { UnicodeTextExtractor extractor = new UnicodeTextExtractor(1, Integer.MAX_VALUE); TextExtractorContext context = new TextExtractorContextImpl(); context.setEncoding("US-ASCII"); extractor.extract(readDocument("xmlUTF8_NOBOM.xml"), context, handler); assertEquals(1, textParts.size()); ExtractedPart part = textParts.get(0); assertNull(part.getContext().getName()); String text = IOUtils.toString(part.getContent(), CharEncoding.UTF_8); assertTrue(text.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); assertTrue(text.contains("This does not work due to a missing BOM marker")); // this should not be found because encoding is explicitly set to US-ASCII assertFalse(text.contains("Units: ??")); } @Test(expected = LimitReachedException.class) public void testLimitExceeded() throws IOException { UnicodeTextExtractor extractor = new UnicodeTextExtractor(1, 10); TextExtractorContext context = new TextExtractorContextImpl(); extractor.extract(readDocument("big-preamble.html"), context, handler); } }