org.apache.solr.handler.dataimport.DataImporter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.handler.dataimport.DataImporter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.dataimport;

import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.SystemIdResolver;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
import org.apache.solr.handler.dataimport.config.DIHConfiguration;
import org.apache.solr.handler.dataimport.config.Entity;
import org.apache.solr.handler.dataimport.config.PropertyWriter;
import org.apache.solr.handler.dataimport.config.Script;

import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.apache.commons.io.IOUtils;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import java.io.IOException;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;

/**
 * <p> Stores all configuration information for pulling and indexing data. </p>
 * <p>
 * <b>This API is experimental and subject to change</b>
 *
 * @since solr 1.3
 */
public class DataImporter {

    public enum Status {
        IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
    }

    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(LOG);

    private Status status = Status.IDLE;
    private DIHConfiguration config;
    private Date indexStartTime;
    private Properties store = new Properties();
    private Map<String, Map<String, String>> requestLevelDataSourceProps = new HashMap<>();
    private IndexSchema schema;
    public DocBuilder docBuilder;
    public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
    private SolrCore core;
    private Map<String, Object> coreScopeSession = new ConcurrentHashMap<>();
    private ReentrantLock importLock = new ReentrantLock();
    private boolean isDeltaImportSupported = false;
    private final String handlerName;

    /**
     * Only for testing purposes
     */
    DataImporter() {
        this.handlerName = "dataimport";
    }

    DataImporter(SolrCore core, String handlerName) {
        this.handlerName = handlerName;
        this.core = core;
        this.schema = core.getLatestSchema();
    }

    boolean maybeReloadConfiguration(RequestInfo params, NamedList<?> defaultParams) throws IOException {
        if (importLock.tryLock()) {
            boolean success = false;
            try {
                if (null != params.getRequest()) {
                    if (schema != params.getRequest().getSchema()) {
                        schema = params.getRequest().getSchema();
                    }
                }
                String dataConfigText = params.getDataConfig();
                String dataconfigFile = params.getConfigFile();
                InputSource is = null;
                if (dataConfigText != null && dataConfigText.length() > 0) {
                    is = new InputSource(new StringReader(dataConfigText));
                } else if (dataconfigFile != null) {
                    is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
                    is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
                    LOG.info("Loading DIH Configuration: " + dataconfigFile);
                }
                if (is != null) {
                    config = loadDataConfig(is);
                    success = true;
                }

                Map<String, Map<String, String>> dsProps = new HashMap<>();
                if (defaultParams != null) {
                    int position = 0;
                    while (position < defaultParams.size()) {
                        if (defaultParams.getName(position) == null) {
                            break;
                        }
                        String name = defaultParams.getName(position);
                        if (name.equals("datasource")) {
                            success = true;
                            NamedList dsConfig = (NamedList) defaultParams.getVal(position);
                            LOG.info("Getting configuration for Global Datasource...");
                            Map<String, String> props = new HashMap<>();
                            for (int i = 0; i < dsConfig.size(); i++) {
                                props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
                            }
                            LOG.info("Adding properties to datasource: " + props);
                            dsProps.put((String) dsConfig.get("name"), props);
                        }
                        position++;
                    }
                }
                requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
            } catch (IOException ioe) {
                throw ioe;
            } finally {
                importLock.unlock();
            }
            return success;
        } else {
            return false;
        }
    }

    public String getHandlerName() {
        return handlerName;
    }

    public IndexSchema getSchema() {
        return schema;
    }

    /**
     * Used by tests
     */
    public void loadAndInit(String configStr) {
        config = loadDataConfig(new InputSource(new StringReader(configStr)));
    }

    public void loadAndInit(InputSource configFile) {
        config = loadDataConfig(configFile);
    }

    public DIHConfiguration loadDataConfig(InputSource configFile) {

        DIHConfiguration dihcfg = null;
        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

            // only enable xinclude, if a a SolrCore and SystemId is present (makes no sense otherwise)
            if (core != null && configFile.getSystemId() != null) {
                try {
                    dbf.setXIncludeAware(true);
                    dbf.setNamespaceAware(true);
                } catch (UnsupportedOperationException e) {
                    LOG.warn("XML parser doesn't support XInclude option");
                }
            }

            DocumentBuilder builder = dbf.newDocumentBuilder();
            if (core != null)
                builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
            builder.setErrorHandler(XMLLOG);
            Document document;
            try {
                document = builder.parse(configFile);
            } finally {
                // some XML parsers are broken and don't close the byte stream (but they should according to spec)
                IOUtils.closeQuietly(configFile.getByteStream());
            }

            dihcfg = readFromXml(document);
            LOG.info("Data Configuration loaded successfully");
        } catch (Exception e) {
            throw new DataImportHandlerException(SEVERE, "Data Config problem: " + e.getMessage(), e);
        }
        for (Entity e : dihcfg.getEntities()) {
            if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
                isDeltaImportSupported = true;
                break;
            }
        }
        return dihcfg;
    }

    public DIHConfiguration readFromXml(Document xmlDocument) {
        DIHConfiguration config;
        List<Map<String, String>> functions = new ArrayList<>();
        Script script = null;
        Map<String, Map<String, String>> dataSources = new HashMap<>();

        NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
        if (dataConfigTags == null || dataConfigTags.getLength() == 0) {
            throw new DataImportHandlerException(SEVERE, "the root node '<dataConfig>' is missing");
        }
        Element e = (Element) dataConfigTags.item(0);
        List<Element> documentTags = ConfigParseUtil.getChildNodes(e, "document");
        if (documentTags.isEmpty()) {
            throw new DataImportHandlerException(SEVERE,
                    "DataImportHandler " + "configuration file must have one <document> node.");
        }

        List<Element> scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT);
        if (!scriptTags.isEmpty()) {
            script = new Script(scriptTags.get(0));
        }

        // Add the provided evaluators
        List<Element> functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION);
        if (!functionTags.isEmpty()) {
            for (Element element : functionTags) {
                String func = ConfigParseUtil.getStringAttribute(element, NAME, null);
                String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null);
                if (func == null || clz == null) {
                    throw new DataImportHandlerException(SEVERE,
                            "<function> must have a 'name' and 'class' attributes");
                } else {
                    functions.add(ConfigParseUtil.getAllAttributes(element));
                }
            }
        }
        List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
        if (!dataSourceTags.isEmpty()) {
            for (Element element : dataSourceTags) {
                Map<String, String> p = new HashMap<>();
                HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
                for (Map.Entry<String, String> entry : attrs.entrySet()) {
                    p.put(entry.getKey(), entry.getValue());
                }
                dataSources.put(p.get("name"), p);
            }
        }
        if (dataSources.get(null) == null) {
            for (Map<String, String> properties : dataSources.values()) {
                dataSources.put(null, properties);
                break;
            }
        }
        PropertyWriter pw = null;
        List<Element> propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
        if (propertyWriterTags.isEmpty()) {
            boolean zookeeper = false;
            if (this.core != null && this.core.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
                zookeeper = true;
            }
            pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter" : "SimplePropertiesWriter",
                    Collections.<String, String>emptyMap());
        } else if (propertyWriterTags.size() > 1) {
            throw new DataImportHandlerException(SEVERE,
                    "Only one " + ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
        } else {
            Element pwElement = propertyWriterTags.get(0);
            String type = null;
            Map<String, String> params = new HashMap<>();
            for (Map.Entry<String, String> entry : ConfigParseUtil.getAllAttributes(pwElement).entrySet()) {
                if (TYPE.equals(entry.getKey())) {
                    type = entry.getValue();
                } else {
                    params.put(entry.getKey(), entry.getValue());
                }
            }
            if (type == null) {
                throw new DataImportHandlerException(SEVERE,
                        "The " + ConfigNameConstants.PROPERTY_WRITER + " element must specify " + TYPE);
            }
            pw = new PropertyWriter(type, params);
        }
        return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
    }

    @SuppressWarnings("unchecked")
    private DIHProperties createPropertyWriter() {
        DIHProperties propWriter = null;
        PropertyWriter configPw = config.getPropertyWriter();
        try {
            Class<DIHProperties> writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
            propWriter = writerClass.newInstance();
            propWriter.init(this, configPw.getParameters());
        } catch (Exception e) {
            throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
                    "Unable to PropertyWriter implementation:" + configPw.getType(), e);
        }
        return propWriter;
    }

    public DIHConfiguration getConfig() {
        return config;
    }

    Date getIndexStartTime() {
        return indexStartTime;
    }

    void setIndexStartTime(Date indextStartTime) {
        this.indexStartTime = indextStartTime;
    }

    void store(Object key, Object value) {
        store.put(key, value);
    }

    Object retrieve(Object key) {
        return store.get(key);
    }

    public DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
        Map<String, String> p = requestLevelDataSourceProps.get(name);
        if (p == null)
            p = config.getDataSources().get(name);
        if (p == null)
            p = requestLevelDataSourceProps.get(null);// for default data source
        if (p == null)
            p = config.getDataSources().get(null);
        if (p == null)
            throw new DataImportHandlerException(SEVERE,
                    "No dataSource :" + name + " available for entity :" + key.getName());
        String type = p.get(TYPE);
        DataSource dataSrc = null;
        if (type == null) {
            dataSrc = new JdbcDataSource();
        } else {
            try {
                dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).newInstance();
            } catch (Exception e) {
                wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type);
            }
        }
        try {
            Properties copyProps = new Properties();
            copyProps.putAll(p);
            Map<String, Object> map = ctx.getRequestParameters();
            if (map.containsKey("rows")) {
                int rows = Integer.parseInt((String) map.get("rows"));
                if (map.containsKey("start")) {
                    rows += Integer.parseInt((String) map.get("start"));
                }
                copyProps.setProperty("maxRows", String.valueOf(rows));
            }
            dataSrc.init(ctx, copyProps);
        } catch (Exception e) {
            wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName());
        }
        return dataSrc;
    }

    public Status getStatus() {
        return status;
    }

    public void setStatus(Status status) {
        this.status = status;
    }

    public boolean isBusy() {
        return importLock.isLocked();
    }

    public void doFullImport(DIHWriter writer, RequestInfo requestParams) {
        LOG.info("Starting Full Import");
        setStatus(Status.RUNNING_FULL_DUMP);
        try {
            DIHProperties dihPropWriter = createPropertyWriter();
            setIndexStartTime(dihPropWriter.getCurrentTimestamp());
            docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
            checkWritablePersistFile(writer, dihPropWriter);
            docBuilder.execute();
            if (!requestParams.isDebug())
                cumulativeStatistics.add(docBuilder.importStatistics);
        } catch (Exception e) {
            SolrException.log(LOG, "Full Import failed", e);
            docBuilder.handleError("Full Import failed", e);
        } finally {
            setStatus(Status.IDLE);
            DocBuilder.INSTANCE.set(null);
        }

    }

    private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) {
        if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
            throw new DataImportHandlerException(SEVERE,
                    "Properties is not writable. Delta imports are supported by data config but will not work.");
        }
    }

    public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) {
        LOG.info("Starting Delta Import");
        setStatus(Status.RUNNING_DELTA_DUMP);
        try {
            DIHProperties dihPropWriter = createPropertyWriter();
            setIndexStartTime(dihPropWriter.getCurrentTimestamp());
            docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
            checkWritablePersistFile(writer, dihPropWriter);
            docBuilder.execute();
            if (!requestParams.isDebug())
                cumulativeStatistics.add(docBuilder.importStatistics);
        } catch (Exception e) {
            LOG.error("Delta Import Failed", e);
            docBuilder.handleError("Delta Import Failed", e);
        } finally {
            setStatus(Status.IDLE);
            DocBuilder.INSTANCE.set(null);
        }

    }

    public void runAsync(final RequestInfo reqParams, final DIHWriter sw) {
        new Thread(() -> runCmd(reqParams, sw)).start();
    }

    void runCmd(RequestInfo reqParams, DIHWriter sw) {
        String command = reqParams.getCommand();
        if (command.equals(ABORT_CMD)) {
            if (docBuilder != null) {
                docBuilder.abort();
            }
            return;
        }
        if (!importLock.tryLock()) {
            LOG.warn("Import command failed . another import is running");
            return;
        }
        try {
            if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) {
                doFullImport(sw, reqParams);
            } else if (command.equals(DELTA_IMPORT_CMD)) {
                doDeltaImport(sw, reqParams);
            }
        } finally {
            importLock.unlock();
        }
    }

    @SuppressWarnings("unchecked")
    Map<String, String> getStatusMessages() {
        //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we
        // synchronize on the object it must be safe to iterate through the map
        Map statusMessages = (Map) retrieve(STATUS_MSGS);
        Map<String, String> result = new LinkedHashMap<>();
        if (statusMessages != null) {
            synchronized (statusMessages) {
                for (Object o : statusMessages.entrySet()) {
                    Map.Entry e = (Map.Entry) o;
                    //the toString is taken because some of the Objects create the data lazily when toString() is called
                    result.put((String) e.getKey(), e.getValue().toString());
                }
            }
        }
        return result;

    }

    public DocBuilder getDocBuilder() {
        return docBuilder;
    }

    public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) {
        DIHProperties dihPropWriter = createPropertyWriter();
        return new DocBuilder(this, writer, dihPropWriter, requestParams);
    }

    Map<String, Evaluator> getEvaluators() {
        return getEvaluators(config.getFunctions());
    }

    /**
     * used by tests.
     */
    Map<String, Evaluator> getEvaluators(List<Map<String, String>> fn) {
        Map<String, Evaluator> evaluators = new HashMap<>();
        evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
        evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
        evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
        evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
        SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
        for (Map<String, String> map : fn) {
            try {
                evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).newInstance());
            } catch (Exception e) {
                wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
            }
        }
        return evaluators;
    }

    static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
        @Override
        protected AtomicLong initialValue() {
            return new AtomicLong();
        }
    };

    static final class MSG {
        public static final String NO_CONFIG_FOUND = "Configuration not found";

        public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";

        public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";

        public static final String LOAD_EXP = "Exception while loading DataImporter";

        public static final String JMX_DESC = "Manage data import from databases to Solr";

        public static final String CMD_RUNNING = "A command is still running...";

        public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";

        public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";

        public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";

        public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";

        public static final String TOTAL_FAILED_DOCS = "Total Documents Failed";

        public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";

        public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";

        public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";

        public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
    }

    public SolrCore getCore() {
        return core;
    }

    void putToCoreScopeSession(String key, Object val) {
        coreScopeSession.put(key, val);
    }

    Object getFromCoreScopeSession(String key) {
        return coreScopeSession.get(key);
    }

    public static final String COLUMN = "column";

    public static final String TYPE = "type";

    public static final String DATA_SRC = "dataSource";

    public static final String MULTI_VALUED = "multiValued";

    public static final String NAME = "name";

    public static final String STATUS_MSGS = "status-messages";

    public static final String FULL_IMPORT_CMD = "full-import";

    public static final String IMPORT_CMD = "import";

    public static final String DELTA_IMPORT_CMD = "delta-import";

    public static final String ABORT_CMD = "abort";

    public static final String DEBUG_MODE = "debug";

    public static final String RELOAD_CONF_CMD = "reload-config";

    public static final String SHOW_CONF_CMD = "show-config";

}