org.dita.dost.writer.SeparateChunkTopicParser.java Source code

Introduction

Here is the source code for org.dita.dost.writer.SeparateChunkTopicParser.java
Source

/*
 * This file is part of the DITA Open Toolkit project.
 *
 * Copyright 2007 IBM Corporation
 *
 * See the accompanying LICENSE file for applicable license.
    
 */
package org.dita.dost.writer;

import org.dita.dost.exception.DITAOTException;
import org.dita.dost.exception.DITAOTXMLErrorHandler;
import org.dita.dost.util.Job.FileInfo;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import javax.xml.parsers.DocumentBuilder;
import java.io.*;
import java.net.URI;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Deque;
import java.util.Stack;

import static org.apache.commons.io.FileUtils.deleteQuietly;
import static org.apache.commons.io.FileUtils.moveFile;
import static org.dita.dost.module.GenMapAndTopicListModule.ELEMENT_STUB;
import static org.dita.dost.reader.ChunkMapReader.*;
import static org.dita.dost.util.Constants.*;
import static org.dita.dost.util.StringUtils.split;
import static org.dita.dost.util.URLUtils.*;
import static org.dita.dost.util.XMLUtils.*;

/**
 * Split topic into multiple files for {@code by-topic} chunking.
 * Not reusable and not thread-safe.
 */
public final class SeparateChunkTopicParser extends AbstractChunkTopicParser {

    private final XMLReader reader;
    // stub is used as the anchor to mark where to insert generated child
    // topicref inside current topicref
    private Element stub;
    // siblingStub is similar to stub. The only different is it is used to
    // insert generated topicref sibling to current topicref
    private Element siblingStub;
    private final Deque<URI> outputFileNameStack = new ArrayDeque<>();
    private Element topicDoc = null;
    final Deque<Writer> outputStack = new ArrayDeque<>();
    final Deque<Element> stubStack = new ArrayDeque<>();

    /**
     * Constructor.
     */
    public SeparateChunkTopicParser() {
        super();
        try {
            reader = getXMLReader();
            reader.setContentHandler(this);
            reader.setFeature(FEATURE_NAMESPACE_PREFIX, true);
        } catch (final Exception e) {
            throw new RuntimeException("Failed to initialize XML parser: " + e.getMessage(), e);
        }
    }

    @Override
    public void write(final URI currentFile) throws DITAOTException {
        this.currentFile = currentFile;
        final URI hrefValue = toURI(getValue(rootTopicref, ATTRIBUTE_NAME_HREF));
        final URI copytoValue = toURI(getValue(rootTopicref, ATTRIBUTE_NAME_COPY_TO));
        final String scopeValue = getCascadeValue(rootTopicref, ATTRIBUTE_NAME_SCOPE);
        // Chimera path, has fragment
        URI parseFilePath;
        final Collection<String> chunkValue = split(getValue(rootTopicref, ATTRIBUTE_NAME_CHUNK));
        final String processRoleValue = getCascadeValue(rootTopicref, ATTRIBUTE_NAME_PROCESSING_ROLE);
        boolean dotchunk = false;

        if (copytoValue != null) {
            if (hrefValue != null && hrefValue.getFragment() != null) {
                parseFilePath = setFragment(copytoValue, hrefValue.getFragment());
            } else {
                parseFilePath = copytoValue;
            }
        } else {
            parseFilePath = hrefValue;
        }

        try {
            // if the path to target file make sense
            currentParsingFile = currentFile.resolve(parseFilePath);
            URI outputFileName;
            /*
             * FIXME: we have code flaws here, references in ditamap need to
             * be updated to new created file.
             */
            String id = null;
            String firstTopicID = null;
            if (parseFilePath.getFragment() != null) {
                id = parseFilePath.getFragment();
                if (chunkValue.contains(CHUNK_SELECT_BRANCH)) {
                    outputFileName = resolve(currentFile, id + FILE_EXTENSION_DITA);
                    targetTopicId = id;
                    startFromFirstTopic = false;
                    selectMethod = CHUNK_SELECT_BRANCH;
                } else if (chunkValue.contains(CHUNK_SELECT_DOCUMENT)) {
                    firstTopicID = getFirstTopicId(currentFile.resolve(parseFilePath).getPath());

                    topicDoc = getTopicDoc(currentFile.resolve(parseFilePath));

                    if (firstTopicID != null) {
                        outputFileName = resolve(currentFile, firstTopicID + FILE_EXTENSION_DITA);
                        targetTopicId = firstTopicID;
                    } else {
                        outputFileName = resolve(currentParsingFile, null);
                        dotchunk = true;
                        targetTopicId = null;
                    }
                    selectMethod = CHUNK_SELECT_DOCUMENT;
                } else {
                    outputFileName = resolve(currentFile, id + FILE_EXTENSION_DITA);
                    targetTopicId = id;
                    startFromFirstTopic = false;
                    selectMethod = CHUNK_SELECT_TOPIC;
                }
            } else {
                firstTopicID = getFirstTopicId(currentFile.resolve(parseFilePath).getPath());

                topicDoc = getTopicDoc(currentFile.resolve(parseFilePath));

                if (firstTopicID != null) {
                    outputFileName = resolve(currentFile, firstTopicID + FILE_EXTENSION_DITA);
                    targetTopicId = firstTopicID;
                } else {
                    outputFileName = resolve(currentParsingFile, null);
                    dotchunk = true;
                    targetTopicId = null;
                }
                selectMethod = CHUNK_SELECT_DOCUMENT;
            }
            if (copytoValue != null) {
                // use @copy-to value as the new file name
                outputFileName = resolve(currentFile, copytoValue.toString());
            }

            if (new File(outputFileName).exists()) {
                final URI t = outputFileName;
                outputFileName = resolve(currentFile, generateFilename());
                conflictTable.put(outputFileName, t);
                dotchunk = false;
            }
            output = new OutputStreamWriter(new FileOutputStream(new File(outputFileName)), UTF8);
            outputFile = outputFileName;

            if (!dotchunk) {
                final FileInfo fi = generateFileInfo(outputFile);
                job.add(fi);

                changeTable.put(currentFile.resolve(parseFilePath), setFragment(outputFileName, id));
                // new generated file
                changeTable.put(outputFileName, outputFileName);
            }

            // change the href value
            final URI newHref = setFragment(
                    getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), outputFileName),
                    firstTopicID != null ? firstTopicID : id);
            rootTopicref.setAttribute(ATTRIBUTE_NAME_HREF, newHref.toString());

            include = false;

            addStubElements();

            // Place siblingStub
            if (rootTopicref.getNextSibling() != null) {
                rootTopicref.getParentNode().insertBefore(siblingStub, rootTopicref.getNextSibling());
            } else {
                rootTopicref.getParentNode().appendChild(siblingStub);
            }

            reader.setErrorHandler(new DITAOTXMLErrorHandler(currentParsingFile.getPath(), logger));
            logger.info("Processing " + currentParsingFile);
            reader.parse(currentParsingFile.toString());
            output.flush();

            removeStubElements();
        } catch (final RuntimeException e) {
            throw e;
        } catch (final Exception e) {
            logger.error(e.getMessage(), e);
        } finally {
            try {
                if (output != null) {
                    output.close();
                    output = null;
                    if (dotchunk) {
                        final File dst = new File(currentParsingFile);
                        final File src = new File(outputFile);
                        logger.debug("Delete " + currentParsingFile);
                        deleteQuietly(dst);
                        logger.debug("Move " + outputFile + " to " + currentParsingFile);
                        moveFile(src, dst);
                        final FileInfo fi = job.getFileInfo(outputFile);
                        if (fi != null) {
                            job.remove(fi);
                        }
                    }
                }
            } catch (final Exception ex) {
                logger.error(ex.getMessage(), ex);
            }
        }
    }

    private void addStubElements() {
        stub = rootTopicref.getOwnerDocument().createElement(ELEMENT_STUB);
        siblingStub = rootTopicref.getOwnerDocument().createElement(ELEMENT_STUB);
        if (rootTopicref.hasChildNodes()) {
            final NodeList list = rootTopicref.getElementsByTagName(MAP_TOPICMETA.localName);
            if (list.getLength() > 0) {
                final Node node = list.item(0);
                final Node nextSibling = node.getNextSibling();
                // no sibling so node is the last child
                if (nextSibling == null) {
                    node.getParentNode().appendChild(stub);
                } else {
                    // has sibling node
                    node.getParentNode().insertBefore(stub, nextSibling);
                }
            } else {
                // no topicmeta tag.
                rootTopicref.insertBefore(stub, rootTopicref.getFirstChild());
            }
        } else {
            rootTopicref.appendChild(stub);
        }
    }

    private void removeStubElements() {
        stub.getParentNode().removeChild(stub);
        siblingStub.getParentNode().removeChild(siblingStub);
    }

    /**
     * get the document node of a topic file.
     *
     * @param absolutePathToFile topic file
     * @return element.
     */
    private Element getTopicDoc(final URI absolutePathToFile) {
        final DocumentBuilder builder = getDocumentBuilder();
        try {
            final Document doc = builder.parse(absolutePathToFile.toString());
            return doc.getDocumentElement();
        } catch (final SAXException | IOException e) {
            logger.error("Failed to parse " + absolutePathToFile + ": " + e.getMessage(), e);
        }
        return null;
    }

    private URI resolve(final URI base, final String file) {
        assert base.isAbsolute();
        assert base.toString().startsWith(job.tempDirURI.toString());

        final FileInfo srcFi = job.getFileInfo(base);
        final URI dst;
        if (file != null) {
            dst = srcFi.result.resolve(file);
        } else {
            dst = setPath(srcFi.result, srcFi.result.getPath() + FILE_EXTENSION_CHUNK);
        }
        final URI tmp = tempFileNameScheme.generateTempFileName(dst);

        if (job.getFileInfo(tmp) == null) {
            job.add(new FileInfo.Builder(srcFi).result(dst).uri(tmp).build());
        }

        return job.tempDirURI.resolve(tmp);
    }

    // SAX methods

    @Override
    public void startElement(final String uri, final String localName, final String qName, final Attributes atts)
            throws SAXException {
        final String cls = atts.getValue(ATTRIBUTE_NAME_CLASS);
        final String id = atts.getValue(ATTRIBUTE_NAME_ID);

        if (skip && skipLevel > 0) {
            skipLevel++;
        }

        try {
            if (TOPIC_TOPIC.matches(cls)) {
                topicSpecSet.add(qName);

                if (include && !CHUNK_SELECT_TOPIC.equals(selectMethod)) {
                    // chunk="by-topic" and next topic element found
                    outputStack.push(output);
                    outputFileNameStack.push(outputFile);

                    outputFile = generateOutputFilename(id);
                    output = new OutputStreamWriter(new FileOutputStream(new File(outputFile)), UTF8);

                    //                    final FileInfo fi = generateFileInfo(outputFile);
                    //                    job.add(fi);

                    changeTable.put(outputFile, outputFile);
                    if (id != null) {
                        changeTable.put(setFragment(currentParsingFile, id), setFragment(outputFile, id));
                    } else {
                        changeTable.put(currentParsingFile, outputFile);
                    }

                    // write xml header and workdir PI to the new generated file
                    writeStartDocument(output);
                    if (!OS_NAME.toLowerCase().contains(OS_NAME_WINDOWS)) {
                        writeProcessingInstruction(output, PI_WORKDIR_TARGET,
                                new File(currentFile).getParentFile().getAbsolutePath());
                    } else {
                        writeProcessingInstruction(output, PI_WORKDIR_TARGET,
                                UNIX_SEPARATOR + currentFile.resolve("."));
                    }
                    writeProcessingInstruction(output, PI_WORKDIR_TARGET_URI, currentFile.resolve(".").toString());

                    // create a new child element in separate case topicref is equals to parameter
                    // element in separateChunk(Element element)
                    final Element newTopicref = rootTopicref.getOwnerDocument()
                            .createElement(MAP_TOPICREF.localName);
                    newTopicref.setAttribute(ATTRIBUTE_NAME_CLASS, MAP_TOPICREF.toString());
                    newTopicref.setAttribute(ATTRIBUTE_NAME_XTRF, ATTR_XTRF_VALUE_GENERATED);
                    newTopicref.setAttribute(ATTRIBUTE_NAME_HREF,
                            getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), outputFile).toString());

                    final Element topic = searchForNode(topicDoc, id, ATTRIBUTE_NAME_ID, TOPIC_TOPIC);
                    final Element topicmeta = createTopicMeta(topic);
                    newTopicref.appendChild(topicmeta);

                    if (stub != null) {
                        if (includelevel == 0 && siblingStub != null) {
                            // if it is the following sibling topic to the first topic in ditabase
                            // The first topic will not enter the logic at here because when meeting
                            // with first topic in ditabase, the include value is false
                            siblingStub.getParentNode().insertBefore(newTopicref, siblingStub);
                        } else {
                            stub.getParentNode().insertBefore(newTopicref, stub);
                        }
                        stubStack.push(stub);
                        stub = (Element) stub.cloneNode(false);
                        newTopicref.appendChild(stub);
                    }
                }

                processSelect(id);
            }

            if (include) {
                includelevel++;
                final Attributes resAtts = processAttributes(atts);
                writeStartElement(output, qName, resAtts);
            }
        } catch (final IOException e) {
            logger.error(e.getMessage(), e);
        }
    }

    @Override
    public void endElement(final String uri, final String localName, final String qName) throws SAXException {
        if (skip && skipLevel > 0) {
            skipLevel--;
        } else if (skip) {
            include = true;
            skip = false;
            skipLevel = 0;
        }

        if (include) {
            includelevel--;
            // prevent adding </dita> into output
            if (includelevel >= 0) {
                writeEndElement(output, qName);
            }
            if (includelevel == 0 && !CHUNK_SELECT_DOCUMENT.equals(selectMethod)) {
                include = false;
            }
            if (topicSpecSet.contains(qName) && !outputStack.isEmpty()) {
                // if it is end of topic and separate is true
                try {
                    output.close();
                } catch (final IOException e) {
                    logger.error(e.getMessage(), e);
                }
                output = outputStack.pop();
                outputFile = outputFileNameStack.pop();
                stub.getParentNode().removeChild(stub);
                stub = stubStack.pop();
            }
        }
    }

}