org.apache.maven.doxia.DefaultConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.maven.doxia.DefaultConverter.java

Source

package org.apache.maven.doxia;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.BufferedInputStream;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.maven.doxia.logging.Log;
import org.apache.maven.doxia.logging.SystemStreamLog;
import org.apache.maven.doxia.parser.ParseException;
import org.apache.maven.doxia.parser.Parser;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkFactory;
import org.apache.maven.doxia.util.ConverterUtil;
import org.apache.maven.doxia.wrapper.InputFileWrapper;
import org.apache.maven.doxia.wrapper.InputReaderWrapper;
import org.apache.maven.doxia.wrapper.OutputFileWrapper;
import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
import org.codehaus.plexus.ContainerConfiguration;
import org.codehaus.plexus.DefaultContainerConfiguration;
import org.codehaus.plexus.DefaultPlexusContainer;
import org.codehaus.plexus.PlexusContainer;
import org.codehaus.plexus.PlexusContainerException;
import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.SelectorUtils;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;
import org.apache.commons.io.input.XmlStreamReader;
import org.codehaus.plexus.util.xml.XmlUtil;
import org.codehaus.plexus.util.xml.pull.MXParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParserException;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;

/**
 * Default implementation of <code>Converter</code>
 *
 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
 * @version $Id$
 */
public class DefaultConverter implements Converter {
    private static final String APT_PARSER = "apt";

    private static final String CONFLUENCE_PARSER = "confluence";

    private static final String DOCBOOK_PARSER = "docbook";

    private static final String FML_PARSER = "fml";

    private static final String TWIKI_PARSER = "twiki";

    private static final String XDOC_PARSER = "xdoc";

    private static final String XHTML_PARSER = "xhtml";

    /** Supported input format, i.e. supported Doxia parser */
    public static final String[] SUPPORTED_FROM_FORMAT = { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER,
            FML_PARSER, TWIKI_PARSER, XDOC_PARSER, XHTML_PARSER };

    private static final String APT_SINK = "apt";

    private static final String CONFLUENCE_SINK = "confluence";

    private static final String DOCBOOK_SINK = "docbook";

    private static final String FO_SINK = "fo";

    private static final String ITEXT_SINK = "itext";

    private static final String LATEX_SINK = "latex";

    private static final String RTF_SINK = "rtf";

    private static final String TWIKI_SINK = "twiki";

    private static final String XDOC_SINK = "xdoc";

    private static final String XHTML_SINK = "xhtml";

    /** Supported output format, i.e. supported Doxia Sink */
    public static final String[] SUPPORTED_TO_FORMAT = { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK,
            ITEXT_SINK, LATEX_SINK, RTF_SINK, TWIKI_SINK, XDOC_SINK, XHTML_SINK };

    /** Flag to format the generated files, actually only for XML based sinks. */
    private boolean formatOutput;

    /** Plexus container */
    private PlexusContainer plexus;

    /** Doxia logger */
    private Log log;

    /** {@inheritDoc} */
    public void enableLogging(Log log) {
        this.log = log;
    }

    /**
     * Returns a logger for this sink.
     * If no logger has been configured, a new SystemStreamLog is returned.
     *
     * @return Log
     */
    protected Log getLog() {
        if (log == null) {
            log = new SystemStreamLog();
        }

        return log;
    }

    /** {@inheritDoc} */
    public String[] getInputFormats() {
        return SUPPORTED_FROM_FORMAT;
    }

    /** {@inheritDoc} */
    public String[] getOutputFormats() {
        return SUPPORTED_TO_FORMAT;
    }

    /** {@inheritDoc} */
    @SuppressWarnings("unchecked")
    public void convert(InputFileWrapper input, OutputFileWrapper output)
            throws UnsupportedFormatException, ConverterException {
        if (input == null) {
            throw new IllegalArgumentException("input is required");
        }
        if (output == null) {
            throw new IllegalArgumentException("output is required");
        }

        try {
            startPlexusContainer();
        } catch (PlexusContainerException e) {
            throw new ConverterException("PlexusContainerException: " + e.getMessage(), e);
        }

        try {
            if (input.getFile().isFile()) {
                parse(input.getFile(), input.getEncoding(), input.getFormat(), output);
            } else {
                List<File> files;
                try {
                    files = FileUtils.getFiles(input.getFile(), "**/*." + input.getFormat(),
                            StringUtils.join(FileUtils.getDefaultExcludes(), ", "));
                } catch (IOException e) {
                    throw new ConverterException("IOException: " + e.getMessage(), e);
                } catch (IllegalStateException e) {
                    throw new ConverterException("IllegalStateException: " + e.getMessage(), e);
                }

                for (File f : files) {
                    parse(f, input.getEncoding(), input.getFormat(), output);
                }
            }
        } finally {
            stopPlexusContainer();
        }
    }

    /** {@inheritDoc} */
    public void convert(InputReaderWrapper input, OutputStreamWrapper output)
            throws UnsupportedFormatException, ConverterException {
        if (input == null) {
            throw new IllegalArgumentException("input is required");
        }
        if (output == null) {
            throw new IllegalArgumentException("output is required");
        }

        try {
            startPlexusContainer();
        } catch (PlexusContainerException e) {
            throw new ConverterException("PlexusContainerException: " + e.getMessage(), e);
        }

        try {
            Parser parser;
            try {
                parser = ConverterUtil.getParser(plexus, input.getFormat(), SUPPORTED_FROM_FORMAT);
                parser.enableLogging(log);
            } catch (ComponentLookupException e) {
                throw new ConverterException("ComponentLookupException: " + e.getMessage(), e);
            }

            if (getLog().isDebugEnabled()) {
                getLog().debug("Parser used: " + parser.getClass().getName());
            }

            SinkFactory sinkFactory;
            try {
                sinkFactory = ConverterUtil.getSinkFactory(plexus, output.getFormat(), SUPPORTED_TO_FORMAT);
            } catch (ComponentLookupException e) {
                throw new ConverterException("ComponentLookupException: " + e.getMessage(), e);
            }

            Sink sink;
            try {
                sink = sinkFactory.createSink(output.getOutputStream(), output.getEncoding());
            } catch (IOException e) {
                throw new ConverterException("IOException: " + e.getMessage(), e);
            }
            sink.enableLogging(log);

            if (getLog().isDebugEnabled()) {
                getLog().debug("Sink used: " + sink.getClass().getName());
            }

            parse(parser, input.getReader(), sink);
        } finally {
            stopPlexusContainer();
        }
    }

    /** {@inheritDoc} */
    public void setFormatOutput(boolean formatOutput) {
        this.formatOutput = formatOutput;
    }

    // ----------------------------------------------------------------------
    // Private methods
    // ----------------------------------------------------------------------

    /**
     * @param inputFile a not null existing file.
     * @param inputEncoding a not null supported encoding or {@link InputFileWrapper#AUTO_ENCODING}
     * @param inputFormat  a not null supported format or {@link InputFileWrapper#AUTO_FORMAT}
     * @param output not null OutputFileWrapper object
     * @throws ConverterException if any
     * @throws UnsupportedFormatException if any
     */
    private void parse(File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output)
            throws ConverterException, UnsupportedFormatException {
        if (getLog().isDebugEnabled()) {
            getLog().debug("Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
                    + inputEncoding + "' to '" + output.getFile().getAbsolutePath() + "' with the encoding '"
                    + output.getEncoding() + "'");
        }

        if (inputEncoding.equals(InputFileWrapper.AUTO_ENCODING)) {
            inputEncoding = autoDetectEncoding(inputFile);
            if (getLog().isDebugEnabled()) {
                getLog().debug("Auto detect encoding: " + inputEncoding);
            }
        }

        if (inputFormat.equals(InputFileWrapper.AUTO_FORMAT)) {
            inputFormat = autoDetectFormat(inputFile, inputEncoding);
            if (getLog().isDebugEnabled()) {
                getLog().debug("Auto detect input format: " + inputFormat);
            }
        }

        Parser parser;
        try {
            parser = ConverterUtil.getParser(plexus, inputFormat, SUPPORTED_FROM_FORMAT);
            parser.enableLogging(log);
        } catch (ComponentLookupException e) {
            throw new ConverterException("ComponentLookupException: " + e.getMessage(), e);
        }

        File outputFile;
        if (output.getFile().exists() && output.getFile().isDirectory()) {
            outputFile = new File(output.getFile(), inputFile.getName() + "." + output.getFormat());
        } else {
            if (!SelectorUtils.match("**.*", output.getFile().getName())) {
                // assume it is a directory
                output.getFile().mkdirs();
                outputFile = new File(output.getFile(), inputFile.getName() + "." + output.getFormat());
            } else {
                output.getFile().getParentFile().mkdirs();
                outputFile = output.getFile();
            }
        }

        Reader reader;
        try {
            if (inputEncoding != null) {
                if (parser.getType() == Parser.XML_TYPE) {
                    reader = ReaderFactory.newXmlReader(inputFile);
                } else {
                    reader = ReaderFactory.newReader(inputFile, inputEncoding);
                }
            } else {
                reader = ReaderFactory.newPlatformReader(inputFile);
            }
        } catch (IOException e) {
            throw new ConverterException("IOException: " + e.getMessage(), e);
        }

        SinkFactory sinkFactory;
        try {
            sinkFactory = ConverterUtil.getSinkFactory(plexus, output.getFormat(), SUPPORTED_TO_FORMAT);
        } catch (ComponentLookupException e) {
            throw new ConverterException("ComponentLookupException: " + e.getMessage(), e);
        }

        Sink sink;
        try {
            String outputEncoding;
            if (StringUtils.isEmpty(output.getEncoding())
                    || output.getEncoding().equals(OutputFileWrapper.AUTO_ENCODING)) {
                outputEncoding = inputEncoding;
            } else {
                outputEncoding = output.getEncoding();
            }

            OutputStream out = new FileOutputStream(outputFile);
            sink = sinkFactory.createSink(out, outputEncoding);
        } catch (IOException e) {
            throw new ConverterException("IOException: " + e.getMessage(), e);
        }

        sink.enableLogging(log);

        if (getLog().isDebugEnabled()) {
            getLog().debug("Sink used: " + sink.getClass().getName());
        }

        parse(parser, reader, sink);

        if (formatOutput && (output.getFormat().equals(DOCBOOK_SINK) || output.getFormat().equals(FO_SINK)
                || output.getFormat().equals(ITEXT_SINK) || output.getFormat().equals(XDOC_SINK)
                || output.getFormat().equals(XHTML_SINK))) {
            // format all xml files excluding docbook which is buggy
            // TODO Add doc book format
            if (output.getFormat().equals(DOCBOOK_SINK) || inputFormat.equals(DOCBOOK_PARSER)) {
                return;
            }
            Reader r = null;
            Writer w = null;
            try {
                r = ReaderFactory.newXmlReader(outputFile);
                CharArrayWriter caw = new CharArrayWriter();
                XmlUtil.prettyFormat(r, caw);
                w = WriterFactory.newXmlWriter(outputFile);
                w.write(caw.toString());
            } catch (IOException e) {
                throw new ConverterException("IOException: " + e.getMessage(), e);
            } finally {
                IOUtil.close(r);
                IOUtil.close(w);
            }
        }
    }

    /**
     * @param parser not null
     * @param reader not null
     * @param sink not null
     * @throws ConverterException if any
     */
    private void parse(Parser parser, Reader reader, Sink sink) throws ConverterException {
        try {
            parser.parse(reader, sink);
        } catch (ParseException e) {
            throw new ConverterException("ParseException: " + e.getMessage(), e);
        } finally {
            IOUtil.close(reader);
            sink.flush();
            sink.close();
        }
    }

    /**
     * Start the Plexus container.
     *
     * @throws PlexusContainerException if any
     */
    private void startPlexusContainer() throws PlexusContainerException {
        if (plexus != null) {
            return;
        }

        Map<String, String> context = new HashMap<String, String>();
        context.put("basedir", new File("").getAbsolutePath());

        ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
        containerConfiguration.setName("Doxia");
        containerConfiguration.setContext(context);

        plexus = new DefaultPlexusContainer(containerConfiguration);
    }

    /**
     * Stop the Plexus container.
     */
    private void stopPlexusContainer() {
        if (plexus == null) {
            return;
        }

        plexus.dispose();
        plexus = null;
    }

    /**
     * @param f not null file
     * @return the detected encoding for f or <code>null</code> if not able to detect it.
     * @throws IllegalArgumentException if f is not a file.
     * @throws UnsupportedOperationException if could not detect the file encoding.
     * @see {@link XmlStreamReader#getEncoding()} for xml files
     * @see {@link CharsetDetector#detect()} for text files
     */
    private static String autoDetectEncoding(File f) {
        if (!f.isFile()) {
            throw new IllegalArgumentException(
                    "The file '" + f.getAbsolutePath() + "' is not a file, could not detect encoding.");
        }

        Reader reader = null;
        InputStream is = null;
        try {
            if (XmlUtil.isXml(f)) {
                reader = new XmlStreamReader(f);
                return ((XmlStreamReader) reader).getEncoding();
            }

            is = new BufferedInputStream(new FileInputStream(f));
            CharsetDetector detector = new CharsetDetector();
            detector.setText(is);
            CharsetMatch match = detector.detect();

            return match.getName().toUpperCase(Locale.ENGLISH);
        } catch (IOException e) {
            // nop
        } finally {
            IOUtil.close(reader);
            IOUtil.close(is);
        }

        StringBuilder msg = new StringBuilder();
        msg.append("Could not detect the encoding for file: ");
        msg.append(f.getAbsolutePath());
        msg.append("\n Specify explicitly the encoding.");
        throw new UnsupportedOperationException(msg.toString());
    }

    /**
     * Auto detect Doxia format for the given file depending:
     * <ul>
     * <li>the file name for TextMarkup based Doxia files</li>
     * <li>the file content for XMLMarkup based Doxia files</li>
     * </ul>
     *
     * @param f not null file
     * @param encoding a not null encoding.
     * @return the detected encoding from f.
     * @throws IllegalArgumentException if f is not a file.
     * @throws UnsupportedOperationException if could not detect the Doxia format.
     */
    private static String autoDetectFormat(File f, String encoding) {
        if (!f.isFile()) {
            throw new IllegalArgumentException(
                    "The file '" + f.getAbsolutePath() + "' is not a file, could not detect format.");
        }

        for (int i = 0; i < SUPPORTED_FROM_FORMAT.length; i++) {
            String supportedFromFormat = SUPPORTED_FROM_FORMAT[i];

            // Handle Doxia text files
            if (supportedFromFormat.equalsIgnoreCase(APT_PARSER) && isDoxiaFileName(f, supportedFromFormat)) {
                return supportedFromFormat;
            } else if (supportedFromFormat.equalsIgnoreCase(CONFLUENCE_PARSER)
                    && isDoxiaFileName(f, supportedFromFormat)) {
                return supportedFromFormat;
            } else if (supportedFromFormat.equalsIgnoreCase(TWIKI_PARSER)
                    && isDoxiaFileName(f, supportedFromFormat)) {
                return supportedFromFormat;
            }

            // Handle Doxia xml files
            String firstTag = getFirstTag(f);
            if (firstTag == null) {
                continue;
            } else if (firstTag.equals("article") && supportedFromFormat.equalsIgnoreCase(DOCBOOK_PARSER)) {
                return supportedFromFormat;
            } else if (firstTag.equals("faqs") && supportedFromFormat.equalsIgnoreCase(FML_PARSER)) {
                return supportedFromFormat;
            } else if (firstTag.equals("document") && supportedFromFormat.equalsIgnoreCase(XDOC_PARSER)) {
                return supportedFromFormat;
            } else if (firstTag.equals("html") && supportedFromFormat.equalsIgnoreCase(XHTML_PARSER)) {
                return supportedFromFormat;
            }
        }

        StringBuilder msg = new StringBuilder();
        msg.append("Could not detect the Doxia format for file: ");
        msg.append(f.getAbsolutePath());
        msg.append("\n Specify explicitly the Doxia format.");
        throw new UnsupportedOperationException(msg.toString());
    }

    /**
     * @param f not null
     * @param format could be null
     * @return <code>true</code> if the file name computes the format.
     */
    private static boolean isDoxiaFileName(File f, String format) {
        if (f == null) {
            throw new IllegalArgumentException("f is required.");
        }

        Pattern pattern = Pattern.compile("(.*?)\\." + format.toLowerCase(Locale.ENGLISH) + "$");
        Matcher matcher = pattern.matcher(f.getName().toLowerCase(Locale.ENGLISH));

        return matcher.matches();
    }

    /**
     * @param xmlFile not null and should be a file.
     * @return the first tag name if found, <code>null</code> in other case.
     */
    private static String getFirstTag(File xmlFile) {
        if (xmlFile == null) {
            throw new IllegalArgumentException("xmlFile is required.");
        }
        if (!xmlFile.isFile()) {
            throw new IllegalArgumentException("The file '" + xmlFile.getAbsolutePath() + "' is not a file.");
        }

        Reader reader = null;
        try {
            reader = ReaderFactory.newXmlReader(xmlFile);
            XmlPullParser parser = new MXParser();
            parser.setInput(reader);
            int eventType = parser.getEventType();
            while (eventType != XmlPullParser.END_DOCUMENT) {
                if (eventType == XmlPullParser.START_TAG) {
                    return parser.getName();
                }
                eventType = parser.nextToken();
            }
        } catch (FileNotFoundException e) {
            return null;
        } catch (XmlPullParserException e) {
            return null;
        } catch (IOException e) {
            return null;
        } finally {
            IOUtil.close(reader);
        }

        return null;
    }
}