Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.any23.extractor.microdata; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.text.ParseException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.List; import java.util.Properties; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.any23.extractor.html.TagSoupParser; import org.apache.any23.util.StreamUtils; import org.apache.commons.io.IOUtils; import org.junit.Assert; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import static org.junit.Assert.assertFalse; /** * Test case for {@link MicrodataParser}. * * @author Michele Mostarda (mostarda@fbk.eu) */ public class MicrodataParserTest { @Rule public final Timeout timeout = new Timeout(100, TimeUnit.SECONDS); private static final Logger logger = LoggerFactory.getLogger(MicrodataParserTest.class); @Ignore("TODO: Determine the cause of this") @Test public void testBasicFeatures() throws IOException { extractItemsAndVerifyJSONSerialization("microdata-basic", "microdata-basic-expected"); } @Ignore("TODO: Determine the cause of this") @Test public void testNestedMicrodata() throws IOException { extractItemsAndVerifyJSONSerialization("microdata-nested", "microdata-nested-expected"); } @Ignore("TODO: Determine the cause of this") @Test public void testAdvancedItemrefManagement() throws IOException { extractItemsAndVerifyJSONSerialization("microdata-itemref", "microdata-itemref-expected"); } @Ignore("TODO: Determine the cause of this") @Test public void testMicrodataJSONSerialization() throws IOException { final Document document = getMicrodataDom("microdata-nested"); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream ps = new PrintStream(baos); MicrodataParser.getMicrodataAsJSON(document, ps); ps.flush(); final String expected = StreamUtils .asString(this.getClass().getResourceAsStream("/microdata/microdata-json-serialization.json")); Assert.assertEquals("Unexpected serialization for Microdata file.", expected, baos.toString()); } @Test public void testGetContentAsDate() throws IOException, ParseException { final ItemScope target = extractItems("microdata-basic").getDetectedItemScopes()[4]; final GregorianCalendar gregorianCalendar = new GregorianCalendar(2009, GregorianCalendar.MAY, 10); // 2009-05-10 Assert.assertEquals(gregorianCalendar.getTime(), target.getProperties().get("birthday").get(0).getValue().getAsDate()); } @Test public void testGetDateConcurrent() throws Exception { final Date expectedDate = new GregorianCalendar(2009, Calendar.MAY, 10).getTime(); // 2009-05-10 final byte[] content = IOUtils .toByteArray(getClass().getResourceAsStream("/microdata/microdata-basic.html")); final int threadCount = 10; final int attemptCount = 100; final List<Thread> threads = new ArrayList<Thread>(); final CountDownLatch beforeLatch = new CountDownLatch(1); final CountDownLatch afterLatch = new CountDownLatch(threadCount); final AtomicBoolean foundFailure = new AtomicBoolean(false); for (int i = 0; i < threadCount; i++) { threads.add(new Thread("Test-thread-" + i) { @Override public void run() { try { beforeLatch.await(); int counter = 0; while (counter++ < attemptCount && !foundFailure.get()) { final Document document = getDom(content); final MicrodataParserReport report = MicrodataParser.getMicrodata(document); final ItemScope target = report.getDetectedItemScopes()[4]; Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate(); if (!expectedDate.equals(actualDate)) { foundFailure.set(true); } } } catch (Exception ex) { ex.printStackTrace(); foundFailure.set(true); } finally { afterLatch.countDown(); } } }); } for (Thread thread : threads) { thread.start(); } // Let threads start computation beforeLatch.countDown(); // Wait for all threads to complete afterLatch.await(); assertFalse(foundFailure.get()); } /** * Test the main use case of {@link MicrodataParser#deferProperties(String...)} * * @throws IOException * @throws MicrodataParserException */ @Test public void testDeferProperties() throws IOException, MicrodataParserException { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); final ItemProp[] deferred = parser.deferProperties("ip5", "ip4", "ip3", "unexisting"); Assert.assertEquals(3, deferred.length); } /** * Tests the loop detection in {@link MicrodataParser#deferProperties(String...)}. * * @throws IOException * @throws MicrodataParserException */ @Test(expected = MicrodataParserException.class) public void testDeferPropertiesLoopDetection1() throws IOException, MicrodataParserException { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError); parser.deferProperties("loop0"); } /** * Tests the deep loop detection in {@link MicrodataParser#deferProperties(String...)}. * * @throws IOException * @throws MicrodataParserException */ @Test(expected = MicrodataParserException.class) public void testDeferPropertiesLoopDetection2() throws IOException, MicrodataParserException { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError); parser.deferProperties("loop2"); } /** * Tests that the loop detection works property even with multiple calls * of {@link MicrodataParser#deferProperties(String...)} over the same item props. * * @throws java.io.IOException * @throws MicrodataParserException */ @Test public void testDeferPropertiesStateManagement() throws IOException, MicrodataParserException { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); Assert.assertEquals(1, parser.deferProperties("ip1").length); Assert.assertEquals(1, parser.deferProperties("ip1").length); Assert.assertEquals(1, parser.deferProperties("ip1").length); } private Document getDom(String document) throws IOException { final InputStream is = this.getClass().getResourceAsStream(document); try { final TagSoupParser tagSoupParser = new TagSoupParser(is, "http://test-document"); return tagSoupParser.getDOM(); } finally { is.close(); } } private Document getDom(byte[] document) throws IOException { final InputStream is = new ByteArrayInputStream(document); try { final TagSoupParser tagSoupParser = new TagSoupParser(is, "http://test-document"); return tagSoupParser.getDOM(); } finally { is.close(); } } private Document getMicrodataDom(String htmlFile) throws IOException { return getDom("/microdata/" + htmlFile + ".html"); } private MicrodataParserReport extractItems(String htmlFile) throws IOException { final Document document = getMicrodataDom(htmlFile); return MicrodataParser.getMicrodata(document); } private void extractItemsAndVerifyJSONSerialization(String htmlFile, String expectedResult) throws IOException { final MicrodataParserReport report = extractItems(htmlFile); final ItemScope[] items = report.getDetectedItemScopes(); final MicrodataParserException[] errors = report.getErrors(); logger.debug("begin itemScopes"); for (ItemScope item : items) { logger.debug(item.toJSON()); } logger.debug("end itemScopes"); logger.debug("begin errors"); for (MicrodataParserException error : errors) { logger.debug(error.toJSON()); } logger.debug("end errors"); final Properties resultContent = new Properties(); resultContent.load(this.getClass().getResourceAsStream("/microdata/" + expectedResult + ".properties")); final int expectedResults = getExpectedResultCount(resultContent); final int expectedErrors = getExpectedErrorsCount(resultContent); Assert.assertEquals("Unexpected number of detect items.", expectedResults, items.length); Assert.assertEquals("Unexpected number of errors.", expectedErrors, errors.length); for (int i = 0; i < items.length; i++) { Assert.assertEquals("Error while comparing result [" + i + "]", resultContent.getProperty("result" + i), items[i].toJSON()); } for (int i = 0; i < errors.length; i++) { Assert.assertEquals("Error while comparing error [" + i + "]", resultContent.getProperty("error" + i), errors[i].toJSON()); } } private int countKeysWithPrefix(Properties properties, String prefix) { int count = 0; for (Object key : properties.keySet()) { if (key.toString().indexOf(prefix) == 0) count++; } return count; } private int getExpectedResultCount(Properties properties) { return countKeysWithPrefix(properties, "result"); } private int getExpectedErrorsCount(Properties properties) { return countKeysWithPrefix(properties, "error"); } }