org.sweble.wikitext.engine.MassExpansionTest.java Source code

Java tutorial

Introduction

Here is the source code for org.sweble.wikitext.engine.MassExpansionTest.java

Source

/**
 * Copyright 2011 The Open Source Research Group,
 *                University of Erlangen-Nrnberg
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sweble.wikitext.engine;

import static org.junit.Assert.*;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.sweble.wikitext.engine.config.WikiConfigImpl;
import org.sweble.wikitext.engine.config.WikiRuntimeInfo;
import org.sweble.wikitext.engine.nodes.EngProcessedPage;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.engine.utils.NoTransparentRtDataPrinter;
import org.sweble.wikitext.parser.parser.LinkTargetException;

import de.fau.cs.osr.utils.NamedParametrizedSuites;
import de.fau.cs.osr.utils.NamedParametrizedSuites.NamedParametrizedSuite;
import de.fau.cs.osr.utils.NamedParametrizedSuites.Suites;
import de.fau.cs.osr.utils.StringUtils;
import de.fau.cs.osr.utils.TestResourcesFixture;

@RunWith(value = NamedParametrizedSuites.class)
public class MassExpansionTest {
    private static final String TESTS_DIR = "engine/mass-expansion";

    // =========================================================================

    private static final Pattern FILE_URL_RX = Pattern.compile("fileUrl-(.*?)-(-?\\d+)-(-?\\d+).txt");

    private static final Pattern ARTICLE_RX = Pattern.compile("retrieveWikitext-(.*?)-(\\d+).wikitext");

    private static final Pattern TEST_RX = Pattern.compile("(.*?).txt");

    // =========================================================================

    @Suites
    public static List<NamedParametrizedSuite> enumerateSuites() throws Exception {
        File dir = TestResourcesFixture.resourceNameToFile(MassExpansionTest.class, "/" + TESTS_DIR);

        String[] testSetDirs = dir.list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return new File(dir, name).isDirectory();
            }
        });

        String[] testSetZips = dir.list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.toLowerCase().endsWith(".zip");
            }
        });

        List<NamedParametrizedSuite> suites = new ArrayList<NamedParametrizedSuite>();

        for (String testDir : testSetDirs)
            suites.add(enumerateSuiteTestCases(new File(dir, testDir)));

        for (String testZip : testSetZips)
            suites.add(enumerateSuiteTestCasesFromZip(new File(dir, testZip)));

        Collections.sort(suites);

        return suites;
    }

    private static NamedParametrizedSuite enumerateSuiteTestCasesFromZip(File zipFile) throws IOException {
        ZipFile zf = new ZipFile(zipFile);
        try {
            Enumeration<? extends ZipEntry> entries = zf.entries();

            String suiteName;
            {
                String zfname = zipFile.getName();
                if (!zfname.toLowerCase().endsWith(".zip"))
                    throw new InternalError();
                suiteName = zfname.substring(0, zfname.length() - 4);
            }

            String testPrefix = suiteName + "/tests/";
            String resourcesPrefix = suiteName + "/resources/";

            Map<FileUrlKey, String> fileUrls = new HashMap<FileUrlKey, String>();

            Map<String, ArticleDesc> articles = new HashMap<String, ArticleDesc>();

            List<Object[]> testCases = new ArrayList<Object[]>();

            while (entries.hasMoreElements()) {
                ZipEntry ze = (ZipEntry) entries.nextElement();

                // Skip directories
                if (ze.getName().endsWith("/"))
                    continue;

                long longSize = ze.getSize();
                if (longSize > Integer.MAX_VALUE)
                    throw new IllegalArgumentException("Archives contains files too big to process!");

                InputStream is = zf.getInputStream(ze);
                byte[] content = IOUtils.toByteArray(is);
                is.close();

                String filename = ze.getName();
                if (filename.startsWith(resourcesPrefix + "fileUrl-") && filename.endsWith(".txt")) {
                    filename = filename.substring(resourcesPrefix.length());
                    Matcher m = FILE_URL_RX.matcher(filename);
                    if (!m.matches())
                        throw new IllegalArgumentException("Wrong 'fileUrl' pattern: " + ze.getName());

                    String encName = m.group(1);
                    int width = Integer.parseInt(m.group(2));
                    int height = Integer.parseInt(m.group(3));

                    FileUrlKey key = new FileUrlKey(encName, width, height);
                    fileUrls.put(key, new String(content, "UTF8"));
                } else if (filename.startsWith(resourcesPrefix + "retrieveWikitext-")
                        && filename.endsWith(".wikitext")) {
                    filename = filename.substring(resourcesPrefix.length());
                    Matcher m = ARTICLE_RX.matcher(filename);
                    if (!m.matches())
                        throw new IllegalArgumentException("Wrong 'retrieveWikitext' pattern: " + ze.getName());

                    String encName = m.group(1);
                    long revision = Long.parseLong(m.group(2));

                    ArticleDesc article = new ArticleDesc(revision, new String(content, "UTF8"));

                    articles.put(encName, article);
                } else if (filename.startsWith(testPrefix) && filename.endsWith(".txt")) {
                    filename = filename.substring(testPrefix.length());
                    Matcher m = TEST_RX.matcher(filename);
                    if (!m.matches())
                        throw new IllegalArgumentException("Invalid test case filename: " + ze.getName());

                    String title = m.group(1);
                    String test = new String(content, "UTF8");

                    testCases.add(new Object[] { title, fileUrls, articles, test });
                } else {
                    System.err.println("Ignored file in " + zipFile + ": " + ze.getName());
                    continue;
                }
            }

            Collections.sort(testCases, new Comparator<Object[]>() {
                @Override
                public int compare(Object[] o1, Object[] o2) {
                    return ((String) o1[0]).compareTo((String) o2[0]);
                }
            });

            return new NamedParametrizedSuite(zipFile.getName(), MassExpansionTest.class.getSimpleName(),
                    testCases);
        } finally {
            zf.close();
        }
    }

    private static NamedParametrizedSuite enumerateSuiteTestCases(File dir) throws IOException {
        Map<FileUrlKey, String> fileUrls = new HashMap<FileUrlKey, String>();

        Map<String, ArticleDesc> articles = new HashMap<String, ArticleDesc>();

        File resourcesDir = new File(dir, "resources");
        File testCasesDir = new File(dir, "tests");

        String[] fileUrlFiles = resourcesDir.list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.startsWith("fileUrl-") && name.endsWith(".txt");
            }
        });

        String[] articleFiles = resourcesDir.list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.startsWith("retrieveWikitext-") && name.endsWith(".wikitext");
            }
        });

        for (String fileUrlFilename : fileUrlFiles) {
            Matcher m = FILE_URL_RX.matcher(fileUrlFilename);
            if (!m.matches())
                throw new IllegalArgumentException("Wrong 'fileUrl' pattern: " + fileUrlFilename);

            String encName = m.group(1);
            int width = Integer.parseInt(m.group(2));
            int height = Integer.parseInt(m.group(3));

            File fileUrlFile = new File(resourcesDir, fileUrlFilename);
            String url = FileUtils.readFileToString(fileUrlFile, "UTF8");

            FileUrlKey key = new FileUrlKey(encName, width, height);
            fileUrls.put(key, url);
        }

        for (String articleFilename : articleFiles) {
            Matcher m = ARTICLE_RX.matcher(articleFilename);
            if (!m.matches())
                throw new IllegalArgumentException("Wrong 'retrieveWikitext' pattern: " + articleFilename);

            String encName = m.group(1);
            long revision = Long.parseLong(m.group(2));

            File fileUrlFile = new File(resourcesDir, articleFilename);
            String content = FileUtils.readFileToString(fileUrlFile, "UTF8");

            articles.put(encName, new ArticleDesc(revision, content));
        }

        List<Object[]> testCases = new ArrayList<Object[]>();

        String[] testCaseFiles = testCasesDir.list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.endsWith(".txt");
            }
        });

        Arrays.sort(testCaseFiles);

        for (String filename : testCaseFiles) {
            File file = new File(testCasesDir, filename);
            String content = FileUtils.readFileToString(file, "UTF-8");

            testCases.add(new Object[] { filename, fileUrls, articles, content });
        }

        return new NamedParametrizedSuite(dir.getName(), MassExpansionTest.class.getSimpleName(), testCases);
    }

    // =========================================================================

    private final WikiConfigImpl config;

    private final WtEngineImpl engine;

    private final String title;

    private final Map<FileUrlKey, String> fileUrls;

    private final Map<String, ArticleDesc> articles;

    private final String inputFileContent;

    // =========================================================================

    public MassExpansionTest(String title, Map<FileUrlKey, String> fileUrls, Map<String, ArticleDesc> articles,
            String inputFileContent) {
        this.config = fixConfig(DefaultConfigEnWp.generate());
        this.engine = new WtEngineImpl(config);

        this.title = title;
        this.fileUrls = fileUrls;
        this.articles = articles;
        this.inputFileContent = inputFileContent;
    }

    // =========================================================================

    private WikiConfigImpl fixConfig(WikiConfigImpl config) {
        config.setSiteName("English Wikipedia");

        config.setRuntimeInfo(new WikiRuntimeInfo() {
            @Override
            public Calendar getDateAndTime(Locale locale) {
                Calendar timestamp = new GregorianCalendar(locale);
                timestamp.setLenient(true);
                timestamp.set(2012, 9, 18, 14, 25, 13);
                return timestamp;
            }

            @Override
            public Calendar getDateAndTime() {
                return getDateAndTime(Locale.getDefault());
            }
        });

        return config;
    }

    // =========================================================================

    @Test
    public void testOurExpansionMatchesReference() throws Exception {
        ExpansionCallback callback = new MassExpansionCallback();

        boolean forInclusion = false;

        TestDesc desc = buildTest();

        PageTitle title = desc.getTitle();
        PageId pageId = new PageId(title, -1);
        EngProcessedPage ast = engine.expand(pageId, desc.getStmt(), forInclusion, callback);

        String raw = NoTransparentRtDataPrinter.print(ast);
        String actual = polishRawResult(raw);

        assertEquals(desc.getExpected(), actual);
    }

    /**
     * Convert XML char refs to chars and convert "<nowiki></nowiki>" to an
     * empty tag.
     */
    private String polishRawResult(String raw) {
        StringBuilder b = new StringBuilder();

        int copyFrom = 0;
        int searchFrom = 0;
        int i;
        while ((i = raw.indexOf("&#x", searchFrom)) != -1) {
            searchFrom = i + 3;
            int j = raw.indexOf(';', searchFrom);
            if (j == -1 || j > i + 12)
                continue;

            int ch = Integer.valueOf(raw.substring(i + 3, j), 0x10);
            switch (ch) {
            case 0x7B: // {
            case 0x7D: // }
            case 0x7C: // |
            case 0x3D: // =
                break;
            default:
                continue;
            }
            b.append(raw.substring(copyFrom, i));
            b.append(Character.toChars(ch));

            searchFrom = copyFrom = j + 1;
        }
        b.append(raw.substring(copyFrom, raw.length()));

        return b.toString().replace("<nowiki></nowiki>", "<nowiki />");
    }

    // =========================================================================

    private TestDesc buildTest() throws IOException, LinkTargetException {
        String content = inputFileContent;

        String i0Delim = "==[ TITLE ]=====================================================================\n";
        int i0 = content.indexOf(i0Delim);
        if (i0 != 0)
            wrongArticleFormat();
        int from0 = i0 + i0Delim.length();

        String i1Delim = "\n==[ STATEMENT ]=================================================================\n";
        int i1 = content.indexOf(i1Delim, from0);
        if (i1 < 0)
            wrongArticleFormat();
        int from1 = i1 + i1Delim.length();

        String i2Delim = "\n==[ EXPECTED EXPANSION ]========================================================\n";
        int i2 = content.indexOf(i2Delim, from1);
        if (i2 < 0)
            wrongArticleFormat();
        int from2 = i2 + i2Delim.length();

        String i3Delim = "\n==[ END ]=======================================================================\n";
        int i3 = content.indexOf(i3Delim, from2);
        if (i3 < 0)
            wrongArticleFormat();

        PageTitle pageTitle = PageTitle.make(config, content.substring(from0, i1));
        String stmt = content.substring(from1, i2);
        String expected = content.substring(from2, i3);
        return new TestDesc(pageTitle, stmt, expected);
    }

    private void wrongArticleFormat() {
        throw new IllegalArgumentException("Wrong test case file format: " + title);
    }

    // =========================================================================

    public final class MassExpansionCallback implements ExpansionCallback {

        @Override
        public FullPage retrieveWikitext(ExpansionFrame expansionFrame, PageTitle pageTitle) throws Exception {
            String title = StringUtils.safeFilename(pageTitle.getNormalizedFullTitle());

            ArticleDesc article = articles.get(title);
            if (article == null)
                return null;

            PageId pageId = new PageId(pageTitle, article.getRevision());
            return new FullPage(pageId, article.getContent());
        }

        @Override
        public String fileUrl(PageTitle pageTitle, int width, int height) throws Exception {
            String title = StringUtils.safeFilename(pageTitle.getNormalizedFullTitle());

            FileUrlKey key = new FileUrlKey(title, width, height);

            return fileUrls.get(key);
        }

    }

    // =========================================================================

    public static final class TestDesc {
        private final PageTitle title;

        private final String stmt;

        private final String expected;

        public TestDesc(PageTitle title, String stmt, String expected) {
            super();
            this.title = title;
            this.stmt = stmt;
            this.expected = expected;
        }

        public PageTitle getTitle() {
            return title;
        }

        public String getStmt() {
            return stmt;
        }

        public String getExpected() {
            return expected;
        }
    }

    // =========================================================================

    public static final class ArticleDesc {
        private final long revision;

        private final String content;

        public ArticleDesc(long revision, String content) {
            super();
            this.revision = revision;
            this.content = content;
        }

        public long getRevision() {
            return revision;
        }

        public String getContent() {
            return content;
        }

        @Override
        public String toString() {
            return "ArticleDesc [revision=" + revision + ", content=" + content + "]";
        }
    }

    // =========================================================================

    public static final class FileUrlKey {

        private String encName;

        private int width;

        private int height;

        public FileUrlKey(String encName, int width, int height) {
            this.encName = encName;
            this.width = width;
            this.height = height;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + ((encName == null) ? 0 : encName.hashCode());
            result = prime * result + (int) (height ^ (height >>> 32));
            result = prime * result + (int) (width ^ (width >>> 32));
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (getClass() != obj.getClass())
                return false;
            FileUrlKey other = (FileUrlKey) obj;
            if (encName == null) {
                if (other.encName != null)
                    return false;
            } else if (!encName.equals(other.encName))
                return false;
            if (height != other.height)
                return false;
            if (width != other.width)
                return false;
            return true;
        }

        @Override
        public String toString() {
            return "FileUrlKey [encName=" + encName + ", width=" + width + ", height=" + height + "]";
        }
    }
}