de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ModularParser.java Source code

Introduction

Here is the source code for de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ModularParser.java
Source

/*******************************************************************************
 * Copyright (c) 2010 Torsten Zesch. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the GNU Lesser
 * Public License v3 which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/lgpl.html
 *
 * Contributors:
 *       Torsten Zesch - initial API and implementation
 *        Samy Ateia - provided a patch via the JWPL mailing list
 ******************************************************************************/
package de.tudarmstadt.ukp.wikipedia.parser.mediawiki;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import de.tudarmstadt.ukp.wikipedia.parser.*;
import de.tudarmstadt.ukp.wikipedia.parser.Content.FormatType;

/**
 * This is a Parser for MediaWiki Source.<br/>
 * It exist a MediaWikiParserFactory, to get an Instance of this Parser.<br/>
 * But, if you want to, you can create a parser by yourself.
 *
 * @author CJacobi
 *
 */
public class ModularParser implements MediaWikiParser, MediaWikiContentElementParser {

    private final Log logger = LogFactory.getLog(getClass());

    // Options, set by the ParserFactory
    private String lineSeparator;
    private List<String> categoryIdentifers;
    private List<String> languageIdentifers;
    private List<String> imageIdentifers;
    private MediaWikiTemplateParser templateParser;
    private boolean showImageText = false;
    private boolean deleteTags = true;
    private boolean showMathTagContent = true;
    private boolean calculateSrcSpans = true;

    /**
     * Creates a unconfigurated Parser...
     */
    public ModularParser() {
    }

    /**
     * Creates a fully configurated parser...
     */
    public ModularParser(String lineSeparator, List<String> languageIdentifers, List<String> categoryIdentifers,
            List<String> imageIdentifers, boolean showImageText, boolean deleteTags, boolean showMathTagContent,
            boolean calculateSrcSpans, MediaWikiTemplateParser templateParser) {

        setLineSeparator(lineSeparator);
        setLanguageIdentifers(languageIdentifers);
        setCategoryIdentifers(categoryIdentifers);
        setImageIdentifers(imageIdentifers);
        setShowImageText(showImageText);
        setDeleteTags(deleteTags);
        setShowMathTagContent(showMathTagContent);
        setCalculateSrcSpans(calculateSrcSpans);
        setTemplateParser(templateParser);
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    @Override
    public String getLineSeparator() {
        return lineSeparator;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setLineSeparator(String lineSeparator) {
        this.lineSeparator = lineSeparator;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public List<String> getLanguageIdentifers() {
        return languageIdentifers;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setLanguageIdentifers(List<String> languageIdentifers) {
        this.languageIdentifers = listToLowerCase(languageIdentifers);
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public List<String> getCategoryIdentifers() {
        return categoryIdentifers;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setCategoryIdentifers(List<String> categoryIdentifers) {
        this.categoryIdentifers = listToLowerCase(categoryIdentifers);
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public List<String> getImageIdentifers() {
        return imageIdentifers;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setImageIdentifers(List<String> imageIdentifers) {
        this.imageIdentifers = listToLowerCase(imageIdentifers);
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public MediaWikiTemplateParser getTemplateParser() {
        return templateParser;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setTemplateParser(MediaWikiTemplateParser templateParser) {
        this.templateParser = templateParser;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public boolean showImageText() {
        return showImageText;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setShowImageText(boolean showImageText) {
        this.showImageText = showImageText;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public boolean deleteTags() {
        return deleteTags;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setDeleteTags(boolean deleteTags) {
        this.deleteTags = deleteTags;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public boolean showMathTagContent() {
        return showMathTagContent;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setShowMathTagContent(boolean showMathTagContent) {
        this.showMathTagContent = showMathTagContent;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public boolean calculateSrcSpans() {
        return calculateSrcSpans;
    }

    /**
     * Look at MediaWikiParserFactory for a description...
     */
    public void setCalculateSrcSpans(boolean calculateSrcSpans) {
        this.calculateSrcSpans = calculateSrcSpans;
    }

    /**
     * Converts a List of Strings to lower case Strings.
     */
    private List<String> listToLowerCase(List<String> l) {
        List<String> result = new ArrayList<String>();
        for (String s : l) {
            result.add(s.toLowerCase());
        }
        return result;
    }

    /**
     * Look at the MediaWikiParser interface for a description...
     */
    @Override
    public String configurationInfo() {
        StringBuilder result = new StringBuilder();

        result.append("MediaWikiParser configuration:\n");
        result.append("ParserClass: " + this.getClass() + "\n");
        result.append("ShowImageText: " + showImageText + "\n");
        result.append("DeleteTags: " + deleteTags + "\n");
        result.append("ShowMathTagContent: " + showMathTagContent + "\n");
        result.append("CalculateSrcSpans: " + calculateSrcSpans + "\n");

        result.append("LanguageIdentifers: ");
        for (String s : languageIdentifers) {
            result.append(s + " ");
        }
        result.append("\n");

        result.append("CategoryIdentifers: ");
        for (String s : categoryIdentifers) {
            result.append(s + " ");
        }
        result.append("\n");

        result.append("ImageIdentifers: ");
        for (String s : imageIdentifers) {
            result.append(s + " ");
        }
        result.append("\n");

        result.append("TemplateParser: " + templateParser.getClass() + "\n");
        result.append(templateParser.configurationInfo());

        return result.toString();
    }

    /**
     * Checks if the configuration is runnable.
     */
    private boolean runConfig() {
        if (lineSeparator == null) {
            logger.error("Set lineSeparator");
            return false;
        }
        if (categoryIdentifers == null) {
            logger.error("Set categoryIdentifers");
            return false;
        }
        if (languageIdentifers == null) {
            logger.error("Set languageIdentifers");
            return false;
        }
        if (imageIdentifers == null) {
            logger.error("Set imageIdentifers");
            return false;
        }
        if (templateParser == null) {
            logger.error("Set templateParser");
            return false;
        }
        return true;
    }

    /**
     * Look at the MediaWikiParser for a description...
     */
    @Override
    public ParsedPage parse(String src) {
        // check if the configuration is runnable.
        if (!runConfig()) {
            return null;
        }

        // check if the is something to parse. somtimes there is an empty string
        // due to an error of other clases...
        if (src == null || src.length() == 0) {
            return null;
        }

        // creates a new span manager with the given source, appending a newline
        // to avoid errors.
        SpanManager sm = new SpanManager(src.replace('\t', ' ') + lineSeparator);
        if (calculateSrcSpans) {
            sm.enableSrcPosCalculation();
        }

        // Creating a new ParsePage, which will be filled with information in
        // the parseing process.
        ParsedPage ppResult = new ParsedPage();

        // Creating a new Parameter Container
        ContentElementParsingParameters cepp = new ContentElementParsingParameters();

        // Deletes comments out of the Source
        deleteComments(sm);

        // Deletes any TOC Tags, these are not usesd in this parser.
        deleteTOCTag(sm);

        // Removing the Content which should not parsed but integrated later in
        // the resulting text
        sm.manageList(cepp.noWikiSpans);
        parseSpecifiedTag(sm, cepp.noWikiSpans, cepp.noWikiStrings, "PRE", " ");
        parseSpecifiedTag(sm, cepp.noWikiSpans, cepp.noWikiStrings, "NOWIKI");
        if (cepp.noWikiSpans.size() == 0) {
            sm.removeManagedList(cepp.noWikiSpans);
        }

        // Parseing the Math Tags...
        sm.manageList(cepp.mathSpans);
        parseSpecifiedTag(sm, cepp.mathSpans, cepp.mathStrings, "MATH");
        if (cepp.mathSpans.size() == 0) {
            sm.removeManagedList(cepp.mathSpans);
        }

        // Parseing the Templates (the Span List will be added to the managed
        // lists by the function)
        parseTemplates(sm, cepp.templateSpans, cepp.templates, ppResult);

        // Parsing all other Tags
        parseTags(sm, cepp.tagSpans);

        // Converting &lt;gallery>s to normal Images, this is not beautiful, but
        // a simple solution..
        convertGalleriesToImages(sm, cepp.tagSpans);

        // Parsing Links and Images.
        parseImagesAndInternalLinks(sm, cepp.linkSpans, cepp.links);

        // Creating a list of Line Spans to work with lines in the following
        // functions
        LinkedList<Span> lineSpans = new LinkedList<Span>();
        getLineSpans(sm, lineSpans);

        // Removing the Category Links from the Links list, and crating an
        // ContentElement for these links...
        ppResult.setCategoryElement(getSpecialLinks(sm, cepp.linkSpans, cepp.links, " - ", categoryIdentifers));

        // Removing the Language Links from the Links list, and crating an
        // ContentElement for these links...
        ppResult.setLanguagesElement(getSpecialLinks(sm, cepp.linkSpans, cepp.links, " - ", languageIdentifers));

        // Parsing and Setting the Sections... the main work is done in parse
        // sections!
        ppResult.setSections(EmptyStructureRemover.eliminateEmptyStructures(parseSections(sm, cepp, lineSpans)));

        // Finding and Setting the paragraph which is concidered as the "First"
        setFirstParagraph(ppResult);

        // check the calculated source positions, and reset them if necessary.
        if (calculateSrcSpans) {
            SrcPosRangeChecker.checkRange(ppResult);
        }

        // So it is done...
        return ppResult;
    }

    /**
     * Deleting all comments out of the SpanManager...<br/>
     * &lt!-- COMMENT -->
     */
    private void deleteComments(SpanManager sm) {
        int start = 0;
        while ((start = sm.indexOf("<!--", start)) != -1) {
            int end = sm.indexOf("-->", start + 4) + 3;
            if (end == -1 + 3) {
                end = sm.length();
            }

            // Remove the one lineSeparator too, if the whole line is a comment!
            try {
                if (lineSeparator.equals(sm.substring(start - lineSeparator.length(), start))
                        && lineSeparator.equals(sm.substring(end, end + lineSeparator.length()))) {
                    end += lineSeparator.length();
                }
            } catch (IndexOutOfBoundsException e) {
            }

            sm.delete(start, end);
        }
    }

    /**
     * Deleteing ALL TOC Tags
     */
    private void deleteTOCTag(SpanManager sm) {
        // delete all __TOC__ from SRC
        int temp = 0;
        while ((temp = sm.indexOf("__TOC__", temp)) != -1) {
            sm.delete(temp, temp + 2 + 3 + 2);
        }

        // delete all __NOTOC__ from SRC
        temp = 0;
        while ((temp = sm.indexOf("__NOTOC__", temp)) != -1) {
            sm.delete(temp, temp + 2 + 5 + 2);
        }
    }

    private ContentElement getSpecialLinks(SpanManager sm, List<Span> linkSpans, List<Link> links,
            String linkSpacer, List<String> identifers) {
        ContentElement result = new ContentElement();
        StringBuilder text = new StringBuilder();
        List<Link> localLinks = new ArrayList<Link>();

        for (int i = links.size() - 1; i >= 0; i--) {
            String identifer = getLinkNameSpace(links.get(i).getTarget());

            if (identifer != null && identifers.indexOf(identifer) != -1) {
                Link l = links.remove(i);
                Span s = linkSpans.remove(i);
                String linkText = sm.substring(s);
                sm.delete(s);
                l.setHomeElement(result);
                s.adjust(-s.getStart() + text.length());
                text.append(linkText + linkSpacer);
                localLinks.add(l);
                //TODO add type?
            }
        }

        int len = text.length();
        if (len != 0) {
            text.delete(len - linkSpacer.length(), len);
        }

        result.setText(text.toString());
        result.setLinks(localLinks);

        if (result.empty()) {
            return null;
        } else {
            return result;
        }
    }

    private void getLineSpans(SpanManager sm, LinkedList<Span> lineSpans) {
        sm.manageList(lineSpans);

        int start = 0;
        int end;

        while ((end = sm.indexOf(lineSeparator, start)) != -1) {
            lineSpans.add(new Span(start, end).trimTrail(sm));
            start = end + lineSeparator.length();
        }
        lineSpans.add(new Span(start, sm.length()).trimTrail(sm));

        while (!lineSpans.isEmpty() && lineSpans.getFirst().length() == 0) {
            lineSpans.removeFirst();
        }
        while (!lineSpans.isEmpty() && lineSpans.getLast().length() == 0) {
            lineSpans.removeLast();
        }
    }

    private SectionContainer parseSections(SpanManager sm, ContentElementParsingParameters cepp,
            LinkedList<Span> lineSpans) {

        List<SectionContent> contentSections = new ArrayList<SectionContent>();

        SectionContent sc = new SectionContent(1);

        if (calculateSrcSpans) {
            sc.setSrcSpan(new SrcSpan(sm.getSrcPos(lineSpans.getFirst().getStart()), -1));
        }

        // Identify the Line Type and call the necessary Function for the
        // further handling...
        while (!lineSpans.isEmpty()) {

            Span s = lineSpans.getFirst();

            lineType t = getLineType(sm, s);
            switch (t) {
            case SECTION:
                contentSections.add(sc);
                int level = getSectionLevel(sm, s);
                sc = new SectionContent(
                        parseContentElement(sm, cepp, new Span(s.getStart() + level, s.getEnd() - level).trim(sm)),
                        level);
                lineSpans.removeFirst();

                if (calculateSrcSpans) {
                    sc.setSrcSpan(new SrcSpan(sm.getSrcPos(s.getStart()), -1));
                }

                break;

            case HR:
                // remove the HR (----) and handle the rest as a parapraph line
                removeHr(sm, s);
                t = lineType.PARAGRAPH;
            case PARAGRAPH:
            case PARAGRAPH_BOXED:
            case PARAGRAPH_INDENTED:
                sc.addParagraph(buildParagraph(sm, cepp, lineSpans, t));
                break;

            case NESTEDLIST:
            case NESTEDLIST_NR:
                sc.addNestedList(buildNestedList(sm, cepp, lineSpans, t));
                break;

            case DEFINITIONLIST:
                sc.addDefinitionList(buildDefinitionList(sm, cepp, lineSpans));
                break;

            case TABLE:
                sc.addTable(buildTable(sm, cepp, lineSpans));
                break;

            case EMPTYLINE:
                lineSpans.removeFirst();
                break;

            default:
                logger.error("unknown lineStart!: \"" + sm.substring(s) + "\"");
                lineSpans.removeFirst();
            }
        }

        // add the remaining Section to the list.
        contentSections.add(sc);

        return buildSectionStructure(contentSections);
    }

    private Span removeHr(SpanManager sm, Span s) {
        int start = s.getStart();
        final int end = s.getEnd();
        while (sm.charAt(start) == '-' && start < end) {
            start++;
        }
        return s.setStart(start).trim(sm);
    }

    /**
     * The Line Types wich are possible...
     */
    private enum lineType {
        SECTION, TABLE, NESTEDLIST, NESTEDLIST_NR, DEFINITIONLIST, HR, PARAGRAPH, PARAGRAPH_INDENTED, PARAGRAPH_BOXED, EMPTYLINE
    }

    /**
     * Retunrns the Type of a line, this is mainly done by the First Char of the
     * Line...
     */
    private lineType getLineType(SpanManager sm, Span lineSpan) {

        switch (lineSpan.charAt(0, sm)) {

        case '{':
            if (lineSpan.charAt(1, sm) == '|') {
                return lineType.TABLE;
            } else {
                return lineType.PARAGRAPH;
            }

        case '=':
            if (lineSpan.length() > 2 && sm.charAt(lineSpan.getEnd() - 1) == '=') {
                return lineType.SECTION;
            } else {
                return lineType.PARAGRAPH;
            }

        case '-':
            if (lineSpan.charAt(1, sm) == '-' && lineSpan.charAt(2, sm) == '-' && lineSpan.charAt(3, sm) == '-') {
                return lineType.HR;
            } else {
                return lineType.PARAGRAPH;
            }

        case '*':
            return lineType.NESTEDLIST;

        case '#':
            return lineType.NESTEDLIST_NR;

        case ';':
            return lineType.DEFINITIONLIST;

        case ':':
            if (lineSpan.length() > 1) {
                if (lineSpan.length() > 2 && lineSpan.charAt(1, sm) == '{' && lineSpan.charAt(2, sm) == '|') {
                    return lineType.TABLE;
                } else {
                    return lineType.PARAGRAPH_INDENTED;
                }
            } else {
                return lineType.PARAGRAPH;
            }

        case ' ':
            int nonWSPos = lineSpan.nonWSCharPos(sm);
            switch (lineSpan.charAt(nonWSPos, sm)) {
            case Span.ERRORCHAR:
                return lineType.EMPTYLINE;
            case '{':
                if (lineSpan.charAt(nonWSPos + 1, sm) == '|') {
                    return lineType.TABLE;
                }
            default:
                return lineType.PARAGRAPH_BOXED;
            }

        case Span.ERRORCHAR:
            return lineType.EMPTYLINE;

        default:
            return lineType.PARAGRAPH;
        }
    }

    /**
     * Returns the number of Equality Chars which are used to specify the level
     * of the Section.
     */
    private int getSectionLevel(SpanManager sm, Span sectionNameSpan) {
        int begin = sectionNameSpan.getStart();
        int end = sectionNameSpan.getEnd();
        int level = 0;

        try {
            while ((sm.charAt(begin + level) == '=') && (sm.charAt(end - 1 - level) == '=')) {
                level++;
            }
        } catch (StringIndexOutOfBoundsException e) {
            // there is no need to do anything!
            logger.debug("EXCEPTION IS OK: " + e);
        }

        if (begin + level == end) {
            level = (level - 1) / 2;
        }

        return level;
    }

    /**
     * Takes a list of SectionContent and returns a SectionContainer with the
     * given SectionContent s in the right structure.
     */
    private SectionContainer buildSectionStructure(List<SectionContent> scl) {
        SectionContainer result = new SectionContainer(0);

        for (SectionContent sContent : scl) {
            int contentLevel = sContent.getLevel();
            SectionContainer sContainer = result;

            // get the right SectionContainer or create it
            for (int containerLevel = result.getLevel() + 1; containerLevel < contentLevel; containerLevel++) {
                int containerSubSections = sContainer.nrOfSubSections();
                if (containerSubSections != 0) {
                    Section temp = sContainer.getSubSection(containerSubSections - 1);
                    if (temp.getClass() == SectionContainer.class) {
                        sContainer = (SectionContainer) temp;
                    } else {
                        SectionContainer sct = new SectionContainer(temp.getTitleElement(), containerLevel);
                        sct.addSection(temp);
                        if (calculateSrcSpans) {
                            sct.setSrcSpan(temp.getSrcSpan());
                        }
                        temp.setTitleElement(null);
                        temp.setLevel(containerLevel + 1);
                        sContainer.removeSection(temp);
                        sContainer.addSection(sct);
                        sContainer = sct;
                    }
                } else {
                    sContainer = new SectionContainer(null, containerLevel);
                }
            }

            sContainer.addSection(sContent);
        }

        if (calculateSrcSpans) {
            result.setSrcSpan(new SrcSpan(0, -1));
        }

        return result;
    }

    private boolean startsWithIgnoreCase(String s1, String s2) {
        final int s2len = s2.length();
        if (s1.length() < s2len) {
            return false;
        }
        return s1.substring(0, s2len).equalsIgnoreCase(s2);
    }

    private Span getTag(SpanManager sm, int offset) {
        int start = sm.indexOf("<", offset);
        if (start == -1) {
            return null;
        }
        int end = sm.indexOf(">", start);
        if (end == -1) {
            return null;
        }

        Span s = new Span(start, end + 1);
        if (calculateSrcSpans) {
            s.setSrcSpan(new SrcSpan(sm.getSrcPos(start), sm.getSrcPos(end) + 1));
        }
        return s;
    }

    private String getTagText(SpanManager sm, Span tag) {
        return sm.substring(new Span(tag.getStart() + 1, tag.getEnd() - 1).trim(sm));
    }

    private void parseSpecifiedTag(SpanManager sm, List<Span> spans, List<String> strings, String specifier) {
        parseSpecifiedTag(sm, spans, strings, specifier, "");
    }

    private void parseSpecifiedTag(SpanManager sm, List<Span> spans, List<String> strings, String specifier,
            String prefix) {
        int offset = 0;

        Span s;
        while ((s = getTag(sm, offset)) != null) {
            offset = s.getEnd();
            String tagText = getTagText(sm, s);
            if (startsWithIgnoreCase(tagText, specifier)) {

                Span e;
                while ((e = getTag(sm, offset)) != null) {
                    offset = e.getEnd();
                    tagText = getTagText(sm, e);
                    if (startsWithIgnoreCase(tagText, "/" + specifier)) {
                        break;
                    }
                }

                if (e == null) {
                    /*
                     * OF: Setting e to sm.length()results in ArrayIndexOutOfBoundsExeption if calculateSrcSpans=true
                     */
                    //e = new Span(sm.length(), sm.length());
                    e = new Span(Math.max(0, sm.length() - 1), Math.max(0, sm.length() - 1));
                }

                strings.add(sm.substring(s.getEnd(), e.getStart()));

                Span tSpan = new Span(s.getStart(), e.getEnd());
                if (calculateSrcSpans) {
                    tSpan.setSrcSpan(new SrcSpan(sm.getSrcPos(s.getStart()), sm.getSrcPos(e.getEnd())));
                }

                spans.add(tSpan);
                sm.replace(tSpan, prefix + "(" + specifier + ")");
                tSpan.adjustStart(prefix.length());

                offset = tSpan.getEnd();
            }
        }
    }

    private void parseTags(SpanManager sm, List<Span> spans) {
        sm.manageList(spans);

        Span s = new Span(0, 0);
        while ((s = getTag(sm, s.getEnd())) != null) {
            spans.add(s);
        }

        if (spans.size() == 0) {
            sm.removeManagedList(spans);
        }
    }

    private void parseTemplates(SpanManager sm, List<Span> resolvedTemplateSpans,
            List<ResolvedTemplate> resolvedTemplates, ParsedPage pp) {

        sm.manageList(resolvedTemplateSpans);

        int pos = -2;
        Stack<Integer> templateOpenTags = new Stack<Integer>();
        while ((pos = sm.indexOf("{{", pos + 2)) != -1) {
            if (sm.length() > pos + 3 && sm.charAt(pos + 2) == '{' && sm.charAt(pos + 3) != '{') {
                pos++;
            }
            templateOpenTags.push(pos);
        }

        while (!templateOpenTags.empty()) {
            int templateOpenTag = templateOpenTags.pop();
            int templateCloseTag = sm.indexOf("}}", templateOpenTag);
            if (templateCloseTag == -1) {
                continue;
            }

            int templateOptionTag = sm.indexOf("|", templateOpenTag, templateCloseTag);
            int templateNameEnd;
            List<String> templateOptions;

            if (templateOptionTag != -1) {
                templateNameEnd = templateOptionTag;
                templateOptions = tokenize(sm, templateOptionTag + 1, templateCloseTag, "|");
            } else {
                templateNameEnd = templateCloseTag;
                templateOptions = new ArrayList<String>();
            }

            Span ts = new Span(templateOpenTag, templateCloseTag + 2);

            Template t = new Template(ts,
                    encodeWikistyle(sm.substring(templateOpenTag + 2, templateNameEnd).trim()), templateOptions);

            if (calculateSrcSpans) {
                t.setSrcSpan(new SrcSpan(sm.getSrcPos(templateOpenTag), sm.getSrcPos(templateCloseTag + 2)));
            }

            t.setPos(ts);

            ResolvedTemplate rt = templateParser.parseTemplate(t, pp);

            resolvedTemplateSpans.add(ts);
            resolvedTemplates.add(rt);

            sm.replace(ts, rt.getPreParseReplacement());
        }

        if (resolvedTemplateSpans.isEmpty()) {
            sm.removeManagedList(resolvedTemplateSpans);
        }
    }

    private void convertGalleriesToImages(SpanManager sm, List<Span> tagSpans) {
        // Quick Hack, not very efficent, should be improved, wont work with
        // calculateSrcSpans == true !

        for (int i = 0; i < tagSpans.size() - 1; i++) {
            String openText = getTagText(sm, tagSpans.get(i));
            if (startsWithIgnoreCase(openText, "GALLERY")) {

                if (startsWithIgnoreCase(getTagText(sm, tagSpans.get(i + 1)), "/GALLERY")) {

                    // gallery range is tag(i).end() .. tag(i+1).start()
                    Span startSpan = tagSpans.remove(i);
                    Span endSpan = tagSpans.remove(i);
                    i--;

                    StringBuilder sb = new StringBuilder();

                    // caption (any option will be treated as caption)
                    int eqPos = openText.indexOf('=');
                    if (eqPos != -1) {
                        int captionStart = eqPos + 1;
                        int captionEnd = openText.length();

                        if (captionStart < captionEnd && openText.charAt(captionStart) == '"'
                                && openText.charAt(captionEnd - 1) == '"') {
                            captionStart++;
                            captionEnd--;
                        }

                        if (captionStart < captionEnd) {
                            sb.append(openText.substring(captionStart, captionEnd) + lineSeparator);
                        }
                    }

                    // images
                    for (String s : tokenize(sm, startSpan.getEnd(), endSpan.getStart(), lineSeparator)) {
                        sb.append("[[" + s + "]]" + lineSeparator);
                    }

                    // replace the source and remove the tags
                    sm.replace(startSpan.getStart(), endSpan.getEnd(), sb.toString());
                } else {
                    continue;
                }
            }
        }
    }

    private Table buildTable(SpanManager sm, ContentElementParsingParameters cepp, LinkedList<Span> lineSpans) {

        Table result = new Table();
        int col = -1;
        int row = 0;
        int subTables = 0;
        LinkedList<Span> tableDataSpans = new LinkedList<Span>();
        sm.manageList(tableDataSpans);

        if (calculateSrcSpans) {
            result.setSrcSpan(new SrcSpan(sm.getSrcPos(lineSpans.getFirst().getStart()), -1));
        }

        lineSpans.removeFirst();

        while (!lineSpans.isEmpty()) {
            Span s = lineSpans.removeFirst();

            int pos = s.nonWSCharPos(sm);
            char c0 = s.charAt(pos, sm);
            char c1 = s.charAt(pos + 1, sm);

            if (subTables == 0 && (c0 == '!' || c0 == '|')) {
                if (!tableDataSpans.isEmpty()) {
                    lineSpans.addFirst(s);

                    SrcSpan ei = null;
                    if (calculateSrcSpans) {
                        ei = new SrcSpan(sm.getSrcPos(tableDataSpans.getFirst().getStart() - 1) + 1, -1);
                    }

                    TableElement te = new TableElement(parseSections(sm, cepp, tableDataSpans), row, col);
                    te.setSrcSpan(ei);
                    result.addTableElement(te);
                    lineSpans.removeFirst();
                }

                col++;
                if (c1 == '-') {
                    row++;
                    col = -1;
                    continue;
                } else if (c0 == '|' && c1 == '}') {
                    sm.removeManagedList(tableDataSpans);

                    if (calculateSrcSpans) {
                        result.getSrcSpan().setEnd(sm.getSrcPos(s.getEnd()));
                    }

                    return result;
                } else if (c0 == '|' && c1 == '+') {
                    result.setTitleElement(
                            parseContentElement(sm, cepp, new Span(s.getStart() + pos + 2, s.getEnd()).trim(sm)));
                    continue;
                } else {
                    int multipleCols;
                    if ((multipleCols = sm.indexOf("||", s.getStart() + pos + 1, s.getEnd())) != -1) {
                        lineSpans.addFirst(new Span(multipleCols + 1, s.getEnd()));
                        s.setEnd(multipleCols);
                    }

                    int optionTagPos = sm.indexOf("|", s.getStart() + pos + 1, s.getEnd());

                    if (optionTagPos != -1) {
                        s.setStart(optionTagPos + 1).trim(sm);
                    } else {
                        s.adjustStart(pos + 1).trim(sm);
                    }
                }
            } else if (c0 == '|' && c1 == '}') {
                subTables--;
            } else if (c0 == '{' && c1 == '|') {
                subTables++;
            }

            tableDataSpans.addLast(s);
        }

        if (tableDataSpans.size() != 0) {

            SrcSpan ei = null;
            if (calculateSrcSpans) {
                ei = new SrcSpan(sm.getSrcPos(tableDataSpans.getFirst().getStart() - 1) + 1, -1);
            }

            TableElement te = new TableElement(parseSections(sm, cepp, tableDataSpans), row, col);
            te.setSrcSpan(ei);

            result.addTableElement(te);
        }

        sm.removeManagedList(tableDataSpans);

        if (calculateSrcSpans) {
            result.getSrcSpan().setEnd(-1);
        }

        return result;
    }

    private NestedListContainer buildNestedList(SpanManager sm, ContentElementParsingParameters cepp,
            LinkedList<Span> lineSpans, lineType listType) {

        boolean numbered = listType == lineType.NESTEDLIST_NR;
        NestedListContainer result = new NestedListContainer(numbered);

        if (calculateSrcSpans) {
            result.setSrcSpan(new SrcSpan(sm.getSrcPos(lineSpans.getFirst().getStart()), -1));
        }

        LinkedList<Span> nestedListSpans = new LinkedList<Span>();
        while (!lineSpans.isEmpty()) {
            Span s = lineSpans.getFirst();
            if (listType != getLineType(sm, s)) {
                break;
            }
            nestedListSpans.add(new Span(s.getStart() + 1, s.getEnd()).trim(sm));
            lineSpans.removeFirst();
        }
        sm.manageList(nestedListSpans);

        if (calculateSrcSpans) {
            result.getSrcSpan().setEnd(sm.getSrcPos(nestedListSpans.getLast().getEnd()));
        }

        while (!nestedListSpans.isEmpty()) {
            Span s = nestedListSpans.getFirst();
            lineType t = getLineType(sm, s);
            if (t == lineType.NESTEDLIST || t == lineType.NESTEDLIST_NR) {
                result.add(buildNestedList(sm, cepp, nestedListSpans, t));
            } else {
                nestedListSpans.removeFirst();
                result.add((NestedListElement) parseContentElement(sm, cepp, s, new NestedListElement()));
            }
        }

        sm.removeManagedList(nestedListSpans);

        return result;
    }

    private DefinitionList buildDefinitionList(SpanManager sm, ContentElementParsingParameters cepp,
            LinkedList<Span> lineSpans) {
        List<ContentElement> content = new ArrayList<ContentElement>();

        Span s = lineSpans.removeFirst();

        int temp = sm.indexOf(":", s);
        if (temp == -1) {
            content.add(parseContentElement(sm, cepp, new Span(s.getStart() + 1, s.getEnd())));
        } else {
            content.add(parseContentElement(sm, cepp, new Span(temp + 1, s.getEnd())));
            content.add(0, parseContentElement(sm, cepp, new Span(s.getStart() + 1, temp)));
        }

        while (!lineSpans.isEmpty()) {
            Span ns = lineSpans.getFirst();
            if (sm.charAt(ns.getStart()) != ':') {
                break;
            }
            lineSpans.removeFirst();
            content.add(parseContentElement(sm, cepp, new Span(ns.getStart() + 1, ns.getEnd())));
        }

        DefinitionList result = new DefinitionList(content);

        if (calculateSrcSpans) {
            result.setSrcSpan(
                    new SrcSpan(sm.getSrcPos(s.getStart()), content.get(content.size() - 1).getSrcSpan().getEnd()));
        }

        return result;
    }

    private Paragraph buildParagraph(SpanManager sm, ContentElementParsingParameters cepp,
            LinkedList<Span> lineSpans, lineType paragraphType) {

        LinkedList<Span> paragraphSpans = new LinkedList<Span>();
        Paragraph result = new Paragraph();
        Span s = lineSpans.removeFirst();
        paragraphSpans.add(s);

        switch (paragraphType) {
        case PARAGRAPH:
            result.setType(Paragraph.type.NORMAL);
            while (!lineSpans.isEmpty()) {
                if (paragraphType != getLineType(sm, lineSpans.getFirst())) {
                    break;
                }
                paragraphSpans.add(lineSpans.removeFirst());
            }
            break;

        case PARAGRAPH_BOXED:
            result.setType(Paragraph.type.BOXED);
            while (!lineSpans.isEmpty()) {
                lineType lt = getLineType(sm, lineSpans.getFirst());
                if (paragraphType != lt && lineType.EMPTYLINE != lt) {
                    break;
                }
                paragraphSpans.add(lineSpans.removeFirst());
            }
            break;

        case PARAGRAPH_INDENTED:
            result.setType(Paragraph.type.INDENTED);
            s.trim(sm.setCharAt(s.getStart(), ' '));
            break;

        default:
            return null;
        }

        parseContentElement(sm, cepp, paragraphSpans, result);

        return result;
    }

    private List<String> tokenize(SpanManager sm, int start, int end, String delim) {
        List<String> result = new ArrayList<String>();

        if (start > end) {
            logger.debug("tokenize(" + start + ", " + end + ") doesn't make sense");
            return result;
        }

        int s = start;
        int e;
        String token;
        // Span rs;
        while ((e = sm.indexOf(delim, s, end)) != -1) {
            // rs = new Span(s, e).trim( sm );
            // if( rs.length()>0 ) result.add( sm.substring( rs ) );
            token = sm.substring(s, e).trim();
            if (token.length() > 0) {
                result.add(token);
            }
            s = e + delim.length();
        }
        // rs = new Span(s, end).trim( sm );
        // if( rs.length()>0 ) result.add( sm.substring( rs ) );
        token = sm.substring(s, end).trim();
        if (token.length() > 0) {
            result.add(token);
        }

        return result;
    }

    private void parseExternalLinks(SpanManager sm, Span s, String protocol, List<Span> managedList,
            List<Link> links, Content home_cc) {
        int extLinkTargetStart;
        Span extLinkSpan = new Span(0, s.getStart());

        while ((extLinkTargetStart = sm.indexOf(protocol, extLinkSpan.getEnd(), s.getEnd())) != -1) {

            // Allowed char before the protocol identifer ?
            if (extLinkTargetStart > s.getStart() && (" [").indexOf(sm.charAt(extLinkTargetStart - 1)) == -1) {
                extLinkSpan = new Span(0, extLinkTargetStart + 1);
                continue;
            }

            // Target
            int extLinkTargetEnd = extLinkTargetStart;
            while ((lineSeparator + " ]").indexOf(sm.charAt(extLinkTargetEnd)) == -1) {
                extLinkTargetEnd++;
            }

            // Open/Close Tags
            int extLinkOpenTag = extLinkTargetStart - 1;
            int extLinkCloseTag;
            int extLinkTextStart = extLinkTargetStart;
            int extLinkTextEnd = extLinkTargetEnd;

            while (extLinkOpenTag >= s.getStart() && sm.charAt(extLinkOpenTag) == ' ') {
                extLinkOpenTag--;
            }

            if (extLinkOpenTag >= s.getStart() && sm.charAt(extLinkOpenTag) == '[') {
                extLinkCloseTag = sm.indexOf("]", extLinkTargetEnd, s.getEnd());

                if (extLinkCloseTag != -1) {
                    extLinkTextStart = extLinkTargetEnd;
                    // nicht wie bei "normalen" links durhc | getrennt sondenr
                    // durhc leerzeichen !!! scheie !!!
                    while (sm.charAt(extLinkTextStart) == ' ') {
                        extLinkTextStart++;
                    }
                    extLinkTextEnd = extLinkCloseTag;
                    extLinkCloseTag++;

                    if (extLinkTextStart == extLinkTextEnd) {
                        sm.insert(extLinkTextStart, "[ ]");
                        extLinkTextEnd += 3;
                        extLinkCloseTag += 3;
                    }
                } else {
                    extLinkOpenTag = extLinkTargetStart;
                    extLinkCloseTag = extLinkTargetEnd;
                }
            } else {
                extLinkOpenTag = extLinkTargetStart;
                extLinkCloseTag = extLinkTargetEnd;
            }

            extLinkSpan = new Span(extLinkOpenTag, extLinkCloseTag);
            managedList.add(extLinkSpan);

            Link l = new Link(home_cc, extLinkSpan, sm.substring(extLinkTargetStart, extLinkTargetEnd),
                    Link.type.EXTERNAL, null);
            links.add(l);

            if (calculateSrcSpans) {
                l.setSrcSpan(new SrcSpan(sm.getSrcPos(extLinkOpenTag), sm.getSrcPos(extLinkCloseTag - 1) + 1));
            }

            sm.delete(extLinkTextEnd, extLinkCloseTag);
            sm.delete(extLinkOpenTag, extLinkTextStart);
        }
    }

    /**
     * Returns the LOWERCASE NameSpace of the link target
     */
    private static String getLinkNameSpace(String target) {
        int pos = target.indexOf(':');
        if (pos == -1) {
            return null;
        } else {
            return target.substring(0, pos).replace('_', ' ').trim().toLowerCase();
        }
    }

    /**
     * There is not much differences between links an images, so they are parsed
     * in a single step
     */
    private void parseImagesAndInternalLinks(SpanManager sm, List<Span> linkSpans, List<Link> links) {

        sm.manageList(linkSpans);

        int pos = -1;
        Stack<Integer> linkOpenTags = new Stack<Integer>();
        while ((pos = sm.indexOf("[[", pos + 1)) != -1) {
            linkOpenTags.push(pos);
        }

        Span lastLinkSpan = new Span(sm.length() + 1, sm.length() + 1);
        Link.type linkType = Link.type.INTERNAL;

        while (!linkOpenTags.empty()) {
            int linkStartTag = linkOpenTags.pop();
            int linkEndTag = sm.indexOf("]]", linkStartTag);
            if (linkEndTag == -1) {
                continue;
            }

            int linkOptionTag = sm.indexOf("|", linkStartTag, linkEndTag);

            int linkTextStart;
            String linkTarget;

            if (linkOptionTag != -1) {
                linkTextStart = linkOptionTag + 1;
                linkTarget = sm.substring(new Span(linkStartTag + 2, linkOptionTag).trim(sm));
            } else {
                linkTextStart = linkStartTag + 2;
                linkTarget = sm.substring(new Span(linkStartTag + 2, linkEndTag).trim(sm));
            }

            // is is a regular link ?
            if (linkTarget.indexOf(lineSeparator) != -1) {
                continue;
            }
            linkTarget = encodeWikistyle(linkTarget);

            // so it is a Link or image!!!
            List<String> parameters;

            String namespace = getLinkNameSpace(linkTarget);
            if (namespace != null) {
                if (imageIdentifers.indexOf(namespace) != -1) {
                    if (linkOptionTag != -1) {
                        int temp;
                        while ((temp = sm.indexOf("|", linkTextStart, linkEndTag)) != -1) {
                            linkTextStart = temp + 1;
                        }

                        parameters = tokenize(sm, linkOptionTag + 1, linkEndTag, "|");

                        // maybe there is an external link at the end of the
                        // image description...
                        if (sm.charAt(linkEndTag + 2) == ']' && sm.indexOf("[", linkTextStart, linkEndTag) != -1) {
                            linkEndTag++;
                        }
                    } else {
                        parameters = null;
                    }
                    linkType = Link.type.IMAGE;
                } else {
                    //Link has namespace but is not image
                    linkType = Link.type.UNKNOWN;
                    parameters = null;
                }
            } else {
                if (linkType == Link.type.INTERNAL && lastLinkSpan.hits(new Span(linkStartTag, linkEndTag + 2))) {
                    continue;
                }
                parameters = null;
                linkType = Link.type.INTERNAL;
            }

            Span posSpan = new Span(linkTextStart, linkEndTag).trim(sm);
            linkSpans.add(posSpan);

            Link l = new Link(null, posSpan, linkTarget, linkType, parameters);
            links.add(l);

            if (calculateSrcSpans) {
                l.setSrcSpan(new SrcSpan(sm.getSrcPos(linkStartTag), sm.getSrcPos(linkEndTag + 2)));
            }

            sm.delete(posSpan.getEnd(), linkEndTag + 2);
            sm.delete(linkStartTag, posSpan.getStart());

            // removing line separators in link text
            int lsinlink;
            while ((lsinlink = sm.indexOf(lineSeparator, posSpan)) != -1) {
                sm.replace(lsinlink, lsinlink + lineSeparator.length(), " ");
            }

            lastLinkSpan = posSpan;
        }
    }

    /**
     * Searches the Range given by the Span s for the double occurence of
     * "quotation" and puts the results in the List quotedSpans. The Quotation
     * tags will be deleted.
     *
     * @param sm
     *            , the Source in which will be searched
     * @param s
     *            , the range in which will be searched
     * @param quotedSpans
     *            , the List where the Spans will be placed, should be managed
     *            by the SpanManager sm
     * @param quotation
     *            , the start and end tag as String
     */
    private void parseQuotedSpans(SpanManager sm, Span s, List<Span> quotedSpans, String quotation) {

        final int qlen = quotation.length();

        // get the start position
        int start = sm.indexOf(quotation, s.getStart(), s.getEnd());

        while (start != -1) {

            // get the end position
            int end = sm.indexOf(quotation, start + qlen, s.getEnd());
            if (end == -1) {
                break;
            }

            // build a new span from start and end position.
            Span qs = new Span(start, end);
            quotedSpans.add(qs);

            // calculate the original src positions.
            if (calculateSrcSpans) {
                qs.setSrcSpan(new SrcSpan(sm.getSrcPos(start), sm.getSrcPos(end + qlen - 1) + 1));
            }

            // delete the tags.
            sm.delete(end, end + qlen);
            sm.delete(start, start + qlen);

            // get the next start position
            start = sm.indexOf(quotation, qs.getEnd(), s.getEnd());
        }
    }

    /**
     * Searches a line for Bold and Italic quotations, this has to be done
     * linewhise.
     */
    private void parseBoldAndItalicSpans(SpanManager sm, Span line, List<Span> boldSpans, List<Span> italicSpans) {
        // Das suchen nach BOLD und ITALIC muss in den Jeweiligen
        // Zeilen geschenhen, da ein LineSeparator immer BOLD und
        // Italic Tags schliesst.

        // Bold Spans
        parseQuotedSpans(sm, line, boldSpans, "'''");

        // Italic Spans
        parseQuotedSpans(sm, line, italicSpans, "''");

        // Maybe there is ONE SINGLE OPEN TAG left... handel these...
        int openTag = sm.indexOf("''", line);
        if (openTag != -1) {
            // build a Span from this Tag.
            Span qs = new Span(openTag, line.getEnd());

            // calculate the original src positions.
            if (calculateSrcSpans) {
                qs.setSrcSpan(new SrcSpan(sm.getSrcPos(openTag), sm.getSrcPos(line.getEnd())));
            }

            // is it a Bold or an Italic tag ?
            if (sm.indexOf("'''", openTag, openTag + 3) != -1) {
                // --> BOLD
                boldSpans.add(qs);
                sm.delete(openTag, openTag + 3);
            } else {
                // --> ITALIC
                italicSpans.add(qs);
                sm.delete(openTag, openTag + 2);
            }
        }
    }

    private static String encodeWikistyle(String str) {
        return str.replace(' ', '_');
    }

    /**
     * Building a ContentElement from a String
     */
    @Override
    public ContentElement parseContentElement(String src) {
        SpanManager sm = new SpanManager(src);
        ContentElementParsingParameters cepp = new ContentElementParsingParameters();

        parseImagesAndInternalLinks(sm, cepp.linkSpans, cepp.links);

        LinkedList<Span> lineSpans = new LinkedList<Span>();
        getLineSpans(sm, lineSpans);
        sm.removeManagedList(lineSpans);
        return (parseContentElement(sm, cepp, lineSpans, new ContentElement()));
    }

    /**
     * Building a ContentElement from a single line.
     */
    private ContentElement parseContentElement(SpanManager sm, ContentElementParsingParameters cepp,
            Span lineSpan) {
        LinkedList<Span> lineSpans = new LinkedList<Span>();
        lineSpans.add(lineSpan);
        return parseContentElement(sm, cepp, lineSpans, new ContentElement());
    }

    /**
     * Building a ContentElement from a single line. But the result is given, so
     * e.g. a NestedListElement can be filled with information...
     */
    private ContentElement parseContentElement(SpanManager sm, ContentElementParsingParameters cepp, Span lineSpan,
            ContentElement result) {
        LinkedList<Span> lineSpans = new LinkedList<Span>();
        lineSpans.add(lineSpan);
        return parseContentElement(sm, cepp, lineSpans, result);
    }

    /**
     * Building a ContentElement, this funciton is calles by all the other
     * parseContentElement(..) functions
     */
    private ContentElement parseContentElement(SpanManager sm, ContentElementParsingParameters cepp,
            LinkedList<Span> lineSpans, ContentElement result) {

        List<Link> localLinks = new ArrayList<Link>();
        List<Template> localTemplates = new ArrayList<Template>();

        List<Span> boldSpans = new ArrayList<Span>();
        List<Span> italicSpans = new ArrayList<Span>();
        sm.manageList(boldSpans);
        sm.manageList(italicSpans);

        List<Span> managedSpans = new ArrayList<Span>();
        sm.manageList(managedSpans);

        Span contentElementRange = new Span(lineSpans.getFirst().getStart(), lineSpans.getLast().getEnd()).trim(sm);
        managedSpans.add(contentElementRange);

        // set the SrcSpan
        if (calculateSrcSpans) {
            result.setSrcSpan(new SrcSpan(sm.getSrcPos(contentElementRange.getStart()),
                    sm.getSrcPos(contentElementRange.getEnd())));
        }

        sm.manageList(lineSpans);
        while (!lineSpans.isEmpty()) {
            Span line = lineSpans.getFirst();

            parseBoldAndItalicSpans(sm, line, boldSpans, italicSpans);

            // External links
            parseExternalLinks(sm, line, "http://", managedSpans, localLinks, result);
            parseExternalLinks(sm, line, "https://", managedSpans, localLinks, result);
            parseExternalLinks(sm, line, "ftp://", managedSpans, localLinks, result);
            parseExternalLinks(sm, line, "mailto:", managedSpans, localLinks, result);

            // end of linewhise opperations
            lineSpans.removeFirst();
        }
        sm.removeManagedList(lineSpans);

        // Links
        int i;
        i = 0;
        while (i < cepp.linkSpans.size()) {
            if (contentElementRange.hits(cepp.linkSpans.get(i))) {
                Span linkSpan = cepp.linkSpans.remove(i);
                managedSpans.add(linkSpan);
                Link l = cepp.links.remove(i).setHomeElement(result);
                localLinks.add(l);
                if (!showImageText && l.getType() == Link.type.IMAGE) {
                    // deletes the Image Text from the ContentElement Text.
                    sm.delete(linkSpan);
                }
            } else {
                i++;
            }
        }

        // Templates
        i = 0;
        while (i < cepp.templateSpans.size()) {
            Span ts = cepp.templateSpans.get(i);
            if (contentElementRange.hits(ts)) {
                ResolvedTemplate rt = cepp.templates.remove(i);

                if (rt.getPostParseReplacement() != null) {
                    sm.replace(ts, rt.getPostParseReplacement());
                }
                cepp.templateSpans.remove(i);

                Object parsedObject = rt.getParsedObject();
                if (parsedObject != null) {
                    managedSpans.add(ts);

                    Class parsedObjectClass = parsedObject.getClass();
                    if (parsedObjectClass == Template.class) {
                        localTemplates.add((Template) parsedObject);
                    } else if (parsedObjectClass == Link.class) {
                        localLinks.add(((Link) parsedObject).setHomeElement(result));
                    } else {
                        localTemplates.add(rt.getTemplate());
                    }
                }
            } else {
                i++;
            }
        }

        // HTML/XML Tags
        i = 0;
        List<Span> tags = new ArrayList<Span>();
        while (i < cepp.tagSpans.size()) {
            Span s = cepp.tagSpans.get(i);
            if (contentElementRange.hits(s)) {
                cepp.tagSpans.remove(i);
                if (deleteTags) {
                    sm.delete(s);
                } else {
                    tags.add(s);
                    managedSpans.add(s);
                }
            } else {
                i++;
            }
        }

        // noWiki
        i = 0;
        List<Span> localNoWikiSpans = new ArrayList<Span>();
        while (i < cepp.noWikiSpans.size()) {
            Span s = cepp.noWikiSpans.get(i);
            if (contentElementRange.hits(s)) {
                cepp.noWikiSpans.remove(i);
                sm.replace(s, cepp.noWikiStrings.remove(i));
                localNoWikiSpans.add(s);
                managedSpans.add(s);
            } else {
                i++;
            }
        }

        // MATH Tags
        i = 0;
        List<Span> mathSpans = new ArrayList<Span>();
        while (i < cepp.mathSpans.size()) {
            Span s = cepp.mathSpans.get(i);
            if (contentElementRange.hits(s)) {
                cepp.mathSpans.remove(i);

                if (showMathTagContent) {
                    mathSpans.add(s);
                    managedSpans.add(s);
                    sm.replace(s, cepp.mathStrings.remove(i));
                } else {
                    sm.delete(s);
                }
            } else {
                i++;
            }
        }

        result.setText(sm.substring(contentElementRange));

        // managed spans must be removed here and not earlier, because every
        // change in the SpanManager affects the Spans!
        sm.removeManagedList(boldSpans);
        sm.removeManagedList(italicSpans);
        sm.removeManagedList(managedSpans);

        // contentElementRange ist auch noch in managedSpans !!! deswegen:
        final int adjust = -contentElementRange.getStart();
        for (Span s : boldSpans) {
            s.adjust(adjust);
        }
        for (Span s : italicSpans) {
            s.adjust(adjust);
        }
        for (Span s : managedSpans) {
            s.adjust(adjust);
        }

        result.setFormatSpans(FormatType.BOLD, boldSpans);
        result.setFormatSpans(FormatType.ITALIC, italicSpans);
        result.setFormatSpans(FormatType.TAG, tags);
        result.setFormatSpans(FormatType.MATH, mathSpans);
        result.setFormatSpans(FormatType.NOWIKI, localNoWikiSpans);

        result.setLinks(sortLinks(localLinks));
        result.setTemplates(sortTemplates(localTemplates));

        return result;
    }

    /**
     * Sorts the Links...
     */
    private static List<Link> sortLinks(List<Link> links) {
        List<Link> result = new ArrayList<Link>();
        for (Link l : links) {
            int pos = 0;
            while (pos < result.size() && l.getPos().getStart() > result.get(pos).getPos().getStart()) {
                pos++;
            }
            result.add(pos, l);
        }
        return result;
    }

    /**
     * Sorts the Templates...
     */
    private static List<Template> sortTemplates(List<Template> templates) {
        List<Template> result = new ArrayList<Template>();
        for (Template t : templates) {
            int pos = 0;
            while (pos < result.size() && t.getPos().getStart() > result.get(pos).getPos().getStart()) {
                pos++;
            }
            result.add(pos, t);
        }
        return result;
    }

    /**
     * Algorithm to identify the first paragraph of a ParsedPage
     */
    private void setFirstParagraph(ParsedPage pp) {
        int nr = pp.nrOfParagraphs();

        // the paragraph with the lowest number, must not be the first, maybe it
        // is only an Image...
        for (int i = 0; i < nr; i++) {
            Paragraph p = pp.getParagraph(i);

            // get the Text from the paragraph
            SpanManager ptext = new SpanManager(p.getText());
            List<Span> delete = new ArrayList<Span>();
            ptext.manageList(delete);

            // getting the spans to remove from the text, for templates
            List<Template> tl = p.getTemplates();
            for (int j = tl.size() - 1; j >= 0; j--) {
                delete.add(tl.get(j).getPos());
            }

            // getting the spans to remove from the text, for Tags
            List<Span> sl = p.getFormatSpans(FormatType.TAG);
            for (int j = sl.size() - 1; j >= 0; j--) {
                delete.add(sl.get(j));
            }

            // getting the spans to remove from the text, for image text
            if (showImageText) {
                List<Link> ll = p.getLinks(Link.type.IMAGE);
                for (int j = ll.size() - 1; j >= 0; j--) {
                    delete.add(ll.get(j).getPos());
                }
            }

            // delete the spans in reverse order, the spans are managed, so
            // there is no need to sort them
            for (int j = delete.size() - 1; j >= 0; j--) {
                ptext.delete(delete.remove(j));
            }

            // removing line separators if exist, so the result can be trimmed
            // in the next step
            int pos = ptext.indexOf(lineSeparator);
            while (pos != -1) {
                ptext.delete(pos, pos + lineSeparator.length());
                pos = ptext.indexOf(lineSeparator);
            }

            // if the result is not an empty string, we got the number of the
            // first paragraph
            if (!ptext.toString().trim().equals("")) {
                pp.setFirstParagraphNr(i);
                return;
            }
        }
    }

    /**
     * Container for all the Parameters needed in the parseing process
     *
     * @author CJacobi
     *
     */
    class ContentElementParsingParameters {
        List<Span> noWikiSpans;
        List<String> noWikiStrings;
        List<Span> linkSpans;
        List<Link> links;
        List<Span> templateSpans;
        List<ResolvedTemplate> templates;
        List<Span> tagSpans;
        List<Span> mathSpans;
        List<String> mathStrings;

        ContentElementParsingParameters() {
            noWikiSpans = new ArrayList<Span>();
            noWikiStrings = new ArrayList<String>();
            linkSpans = new ArrayList<Span>();
            links = new ArrayList<Link>();
            templateSpans = new ArrayList<Span>();
            templates = new ArrayList<ResolvedTemplate>();
            tagSpans = new ArrayList<Span>();
            mathSpans = new ArrayList<Span>();
            mathStrings = new ArrayList<String>();
        }
    }
}