com.norconex.importer.ImporterConfig.java Source code

Java tutorial

Introduction

Here is the source code for com.norconex.importer.ImporterConfig.java

Source

/* Copyright 2010-2015 Norconex Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.norconex.importer;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import javax.xml.stream.XMLStreamException;

import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.configuration.tree.ExpressionEngine;
import org.apache.commons.configuration.tree.xpath.XPathExpressionEngine;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import com.norconex.commons.lang.config.ConfigurationException;
import com.norconex.commons.lang.config.ConfigurationUtil;
import com.norconex.commons.lang.config.IXMLConfigurable;
import com.norconex.commons.lang.unit.DataUnit;
import com.norconex.commons.lang.xml.EnhancedXMLStreamWriter;
import com.norconex.importer.handler.IImporterHandler;
import com.norconex.importer.parser.GenericDocumentParserFactory;
import com.norconex.importer.parser.IDocumentParserFactory;
import com.norconex.importer.response.IImporterResponseProcessor;

/**
 * Importer configuration.
 * @author Pascal Essiembre
 */
public class ImporterConfig implements IXMLConfigurable {

    private static final Logger LOG = LogManager.getLogger(ImporterConfig.class);

    public static final String DEFAULT_TEMP_DIR_PATH = FileUtils.getTempDirectoryPath();
    public static final int DEFAULT_MAX_FILE_CACHE_SIZE = (int) DataUnit.MB.toBytes(10);
    public static final int DEFAULT_MAX_FILE_POOL_CACHE_SIZE = (int) DataUnit.MB.toBytes(100);

    private IDocumentParserFactory documentParserFactory = new GenericDocumentParserFactory();

    private IImporterHandler[] preParseHandlers;
    private IImporterHandler[] postParseHandlers;
    private IImporterResponseProcessor[] responseProcessors;

    private File tempDir = new File(DEFAULT_TEMP_DIR_PATH);
    private int maxFileCacheSize = DEFAULT_MAX_FILE_CACHE_SIZE;
    private int maxFilePoolCacheSize = DEFAULT_MAX_FILE_POOL_CACHE_SIZE;
    private File parseErrorsSaveDir;

    public IDocumentParserFactory getParserFactory() {
        return documentParserFactory;
    }

    public void setParserFactory(IDocumentParserFactory parserFactory) {
        this.documentParserFactory = parserFactory;
    }

    public File getTempDir() {
        return tempDir;
    }

    public void setTempDir(File tempDir) {
        this.tempDir = tempDir;
    }

    /**
     * Gets the directory where file generating parsing errors will be saved.
     * Default is <code>null</code> (not storing errors).
     * @return directory where to save error files
     */
    public File getParseErrorsSaveDir() {
        return parseErrorsSaveDir;
    }

    /**
     * Sets the directory where file generating parsing errors will be saved.
     * @param parseErrorsSaveDir directory where to save error files
     */
    public void setParseErrorsSaveDir(File parseErrorsSaveDir) {
        this.parseErrorsSaveDir = parseErrorsSaveDir;
    }

    public void setPreParseHandlers(IImporterHandler... handlers) {
        preParseHandlers = handlers;
    }

    public IImporterHandler[] getPreParseHandlers() {
        return ArrayUtils.clone(preParseHandlers);
    }

    public void setPostParseHandlers(IImporterHandler... handlers) {
        postParseHandlers = handlers;
    }

    public IImporterHandler[] getPostParseHandlers() {
        return ArrayUtils.clone(postParseHandlers);
    }

    public IImporterResponseProcessor[] getResponseProcessors() {
        return ArrayUtils.clone(responseProcessors);
    }

    public void setResponseProcessors(IImporterResponseProcessor... responseProcessors) {
        this.responseProcessors = responseProcessors;
    }

    public int getMaxFileCacheSize() {
        return maxFileCacheSize;
    }

    public void setMaxFileCacheSize(int maxFileCacheSize) {
        this.maxFileCacheSize = maxFileCacheSize;
    }

    public int getMaxFilePoolCacheSize() {
        return maxFilePoolCacheSize;
    }

    public void setMaxFilePoolCacheSize(int maxFilePoolCacheSize) {
        this.maxFilePoolCacheSize = maxFilePoolCacheSize;
    }

    @Override
    public void loadFromXML(Reader in) throws IOException {
        if (in == null) {
            return;
        }
        XMLConfiguration xml = ConfigurationUtil.newXMLConfiguration(in);
        try {
            //--- Temp directory -----------------------------------------------
            setTempDir(new File(xml.getString("tempDir", ImporterConfig.DEFAULT_TEMP_DIR_PATH)));

            //--- Parse errors save dir ----------------------------------------
            String saveDir = xml.getString("parseErrorsSaveDir", null);
            if (saveDir != null) {
                setParseErrorsSaveDir(new File(saveDir));
            } else {
                setParseErrorsSaveDir(null);
            }

            //--- File Mem Cache Size ------------------------------------------
            setMaxFileCacheSize(xml.getInt("maxFileCacheSize", ImporterConfig.DEFAULT_MAX_FILE_CACHE_SIZE));
            //--- File Pool Mem Cache Size -------------------------------------
            setMaxFilePoolCacheSize(
                    xml.getInt("maxFilePoolCacheSize", ImporterConfig.DEFAULT_MAX_FILE_POOL_CACHE_SIZE));

            //--- Pre-Import Handlers ------------------------------------------
            setPreParseHandlers(loadImportHandlers(xml, "preParseHandlers"));

            //--- Document Parser Factory --------------------------------------
            setParserFactory(ConfigurationUtil.newInstance(xml, "documentParserFactory", getParserFactory()));

            //--- Post-Import Handlers -----------------------------------------
            setPostParseHandlers(loadImportHandlers(xml, "postParseHandlers"));

            //--- Response Processors ------------------------------------------
            setResponseProcessors(loadResponseProcessors(xml, "responseProcessors.responseProcessor"));
        } catch (Exception e) {
            if (e instanceof ConfigurationException) {
                throw (ConfigurationException) e;
            }
            throw new ConfigurationException("Could not load configuration " + "from XMLConfiguration instance.",
                    e);
        }
    }

    private IImporterHandler[] loadImportHandlers(XMLConfiguration xml, String xmlPath) {
        List<IImporterHandler> handlers = new ArrayList<>();

        ExpressionEngine originalEngine = xml.getExpressionEngine();
        xml.setExpressionEngine(new XPathExpressionEngine());
        List<HierarchicalConfiguration> xmlHandlers = xml.configurationsAt(xmlPath + "/*");
        xml.setExpressionEngine(originalEngine);
        for (HierarchicalConfiguration xmlHandler : xmlHandlers) {
            xmlHandler.setExpressionEngine(originalEngine);
            IImporterHandler handler = (IImporterHandler) ConfigurationUtil.newInstance(xmlHandler);
            if (handler != null) {
                handlers.add(handler);
                //TODO throw exception here?
            }
        }
        return handlers.toArray(new IImporterHandler[] {});
    }

    private IImporterResponseProcessor[] loadResponseProcessors(XMLConfiguration xml, String xmlPath) {
        List<IImporterResponseProcessor> processors = new ArrayList<>();

        List<HierarchicalConfiguration> procNodes = xml.configurationsAt(xmlPath);
        for (HierarchicalConfiguration procNode : procNodes) {
            IImporterResponseProcessor proc = ConfigurationUtil.newInstance(procNode);
            if (proc != null) {
                processors.add(proc);
                LOG.info("Reponse processor loaded: " + proc);
            } else {
                LOG.error("Problem loading reponse processors, " + "please check for other log messages.");
            }
        }
        return processors.toArray(new IImporterResponseProcessor[] {});
    }

    @Override
    public void saveToXML(Writer out) throws IOException {
        try {
            EnhancedXMLStreamWriter writer = new EnhancedXMLStreamWriter(out);
            writer.writeStartElement("importer");
            writer.writeElementString("tempDir", getTempDir().toString());
            writer.writeElementString("parseErrorsSaveDir", Objects.toString(getParseErrorsSaveDir(), null));
            writer.writeElementInteger("maxFileCacheSize", getMaxFileCacheSize());
            writer.writeElementInteger("maxFilePoolCacheSize", getMaxFilePoolCacheSize());
            writer.flush();

            writeHandlers(out, "preParseHandlers", getPreParseHandlers());
            writeObject(out, "documentParserFactory", getParserFactory());
            writeHandlers(out, "postParseHandlers", getPostParseHandlers());
            writeResponseProcessors(out, "responseProcessors", getResponseProcessors());

            writer.writeEndElement();
        } catch (XMLStreamException e) {
            throw new IOException("Could not save importer config.", e);
        }
    }

    private void writeResponseProcessors(Writer out, String listTagName, IImporterResponseProcessor[] processors)
            throws IOException {
        if (ArrayUtils.isEmpty(processors)) {
            return;
        }
        out.write("<" + listTagName + ">");
        for (IImporterResponseProcessor processor : processors) {
            writeObject(out, null, processor);
        }
        out.write("</" + listTagName + ">");
        out.flush();
    }

    private void writeHandlers(Writer out, String listTagName, IImporterHandler[] handlers) throws IOException {
        if (ArrayUtils.isEmpty(handlers)) {
            return;
        }
        out.write("<" + listTagName + ">");
        for (IImporterHandler handler : handlers) {
            writeObject(out, null, handler);
        }
        out.write("</" + listTagName + ">");
        out.flush();
    }

    private void writeObject(Writer out, String tagName, Object object) throws IOException {
        writeObject(out, tagName, object, false);
    }

    private void writeObject(Writer out, String tagName, Object object, boolean ignore) throws IOException {
        if (object == null) {
            if (ignore) {
                out.write("<" + tagName + " ignore=\"" + ignore + "\" />");
            }
            return;
        }
        StringWriter w = new StringWriter();
        if (object instanceof IXMLConfigurable) {
            ((IXMLConfigurable) object).saveToXML(w);
        } else {
            w.write("<" + tagName + " class=\"" + object.getClass().getCanonicalName() + "\" />");
        }
        String xml = w.toString();
        if (ignore) {
            xml = xml.replace("<" + tagName + " class=\"", "<" + tagName + " ignore=\"true\" class=\"");
        }
        out.write(xml);
        out.flush();
    }
}