nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings.java Source code

Java tutorial

Introduction

Here is the source code for nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings.java

Source

/* Copyright (c) 2010 Ministry of the Interior and Kingdom Relations,
 * the Netherlands. All rights reserved.
 * 
 * This file is part of the MinBZK Search Enricher indexing generator.
 * 
 * Search Enricher is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Search Enricher is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Search Enricher. If not, see <http://www.gnu.org/licenses/>. */

package nl.minbzk.dwr.zoeken.enricher.settings;

import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.springframework.util.StringUtils;

/**
 * Enricher settings.
 * 
 * @author Jasper van Veghel <j.veghel@rijksoverheid.nl>
 */
public class EnricherSettings {
    /**
     * The logger.
     */
    private static final Logger logger = (Logger) LoggerFactory.getLogger(EnricherSettings.class);

    /**
     * Constants.
     */
    private static final String KEY_PROCESSOR_TIKA_DETECTORS = "enricher.processor.tika.detectors";
    private static final String KEY_PROCESSOR_TIKA_PARSERS = "enricher.processor.tika.parsers";

    private static final String KEY_ENVELOPE_MATCH_DB = "enricher.envelope.match.db";
    private static final String KEY_ENVELOPE_MATCH_ENCODING = "enricher.envelope.match.encoding";
    private static final String KEY_ENVELOPE_WORD_BREAK_MIN = "enricher.envelope.word.break.min";
    private static final String KEY_ENVELOPE_WORD_BREAK_MAX = "enricher.envelope.word.break.max";
    private static final String KEY_ENVELOPE_JOB = "enricher.envelope.job";
    private static final String KEY_ENVELOPE_JOB_GENERATOR_TYPE = "generator.type";
    private static final String KEY_ENVELOPE_JOB_GENERATOR_MAPPING = "generator.mapping";
    private static final String KEY_ENVELOPE_BOILERPIPE_EXTRACTOR = "boilerpipe.extractor";
    private static final String KEY_ENVELOPE_BOILERPIPE_EXCLUSION_FIELD = "boilerpipe.exclusion.field";
    private static final String KEY_ENVELOPE_BOILERPIPE_EXCLUSION_VALUES = "boilerpipe.exclusion.values";
    private static final String KEY_ENVELOPE_JOB_DB_NAME = "db.name";
    private static final String KEY_ENVELOPE_JOB_DB_TYPE = "db.type";
    private static final String KEY_ENVELOPE_JOB_DB_NAME_COMPOSITION = "db.name.composition";
    private static final String KEY_ENVELOPE_JOB_DB_NAME_PREREQUISITES = "db.name.prerequisites";
    private static final String KEY_ENVELOPE_JOB_RESULT = "result";
    private static final String KEY_ENVELOPE_JOB_STRIP_TAGS = "strip.tags";

    private static final String KEY_ENVELOPE_JOB_DATE_CONVERT = "date.convert";
    private static final String KEY_ENVELOPE_JOB_FORMAT_FROM = "from";
    private static final String KEY_ENVELOPE_JOB_FORMAT_TO = "to";
    private static final String KEY_ENVELOPE_JOB_FORMAT_LOCALE = "locale";

    private static final String KEY_ENVELOPE_JOB_FORMAT_DETECTION_PARAMETER = "format.detection.parameter";
    private static final String KEY_ENVELOPE_JOB_FORMAT_DETECTION_DEFAULT = "format.detection.parameter.default";
    private static final String KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_PARAMETER = "language.detection.parameter";
    private static final String KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_SUPPORTED = "language.detection.supported";
    private static final String KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_DEFAULT = "language.detection.default";

    private static final String KEY_ENVELOPE_JOB_ENTITY_DETECTION_LANGUAGES = "entity.detection.languages";
    private static final String KEY_ENVELOPE_JOB_ENTITY_DETECTION_DESCRIPTORS = "entity.detection.descriptors";
    private static final String KEY_ENVELOPE_JOB_ENTITY_DETECTION_SCAN_TYPES = "entity.detection.scan.types";
    private static final String KEY_ENVELOPE_JOB_ENTITY_DETECTION_FIELD_PREFIX = "entity.detection.field.prefix";

    private static final String KEY_ENVELOPE_JOB_GEO_SPATIAL_FIELD_PREFIX = "geo.spatial.field.prefix";

    private static final String KEY_ENVELOPE_JOB_TIKA_RESOURCE_KEY_PRIORITY = "tika.resource.key.priority";

    private static final String KEY_GENERATOR_ACI_ADD_URI = "enricher.generator.aci.add.uri";
    private static final String KEY_GENERATOR_ACI_COMMIT_URI = "enricher.generator.aci.commit.uri";
    private static final String KEY_GENERATOR_ACI_DELETE_REFERENCE_URI = "enricher.generator.aci.delete.reference.uri";
    private static final String KEY_GENERATOR_ACI_DELETE_DOCID_URI = "enricher.generator.aci.delete.docid.uri";
    private static final String KEY_GENERATOR_ACI_USER_AGENT = "enricher.generator.aci.user.agent";

    private static final String KEY_GENERATOR_SOLR_URI = "enricher.generator.solr.uri";
    private static final String KEY_GENERATOR_SOLR_CLOUD_URI = "enricher.generator.solr.cloud.uri";
    private static final String KEY_GENERATOR_SOLR_UNIQUE_KEY_COMPOSITION = "enricher.generator.solr.unique.key.composition";

    private static final String KEY_GENERATOR_ELASTIC_SEARCH_URI = "enricher.generator.elastic.search.uri";
    private static final String KEY_GENERATOR_ELASTIC_SEARCH_CLUSTER_NAME = "enricher.generator.elastic.search.cluster.name";
    private static final String KEY_GENERATOR_ELASTIC_SEARCH_UNIQUE_KEY_COMPOSITION = "enricher.generator.elastic.search.unique.key.composition";

    private static final String KEY_GENERATOR_DOCUMENTS_PER_UPLOAD = "enricher.generator.documents.per.upload";

    private static final String KEY_LANGUAGE_ANALYSIS_THRESHOLD = "enricher.language.analysis.consideration.threshold";
    private static final String KEY_LANGUAGE_ANALYSIS_MAXIMUM_INSTANCES = "enricher.language.analysis.maximum.instances.per.unit";
    private static final String KEY_LANGUAGE_ANALYSIS_WAITING_TIMEOUT = "enricher.language.analysis.instance.waiting.timeout";

    private static final String KEY_LANGUAGE_DETECTION_PROFILES = "enricher.envelope.language.detection.profiles";

    private static final String KEY_ATTENDER_MEMORY_INDEX_CONFIGURATION = "enricher.attender.memory.index.configuration";

    private static final String JOB_UNKNOWN = "null";

    private static final String PROPERTY_LOGLEVEL = "enricher.debug.level";

    /**
     * Generator enum.
     */
    public enum GeneratorType {
        ACI, Solr, SolrCloud, ElasticSearch
    };

    /**
     * Input file (or null).
     */
    private String inputFile = null;

    /**
     * Output encoding (default to UTF-8).
     */
    private String encoding = "UTF-8";

    /**
     * The database field to match against.
     */
    private final String databaseMatch;

    /**
     * The encoding field to match against.
     */
    private final String encodingMatch;

    /**
     * Custom Tika detectors.
     */
    private final List<String> tikaDetectors = new ArrayList<String>();

    /**
     * Custom Tika parsers.
     */
    private final List<String> tikaParsers = new ArrayList<String>();

    /**
     * The result upload add URI.
     */
    private final String aciAddUri;

    /**
     * The result upload commit URI.
     */
    private final String aciCommitUri;

    /**
     * The result upload delete reference URI.
     */
    private final String aciDeleteReferenceUri;

    /**
     * The result upload delete DocId URI.
     */
    private final String aciDeleteDocIdUri;

    /**
     * The result upload agent.
     */
    private final String aciUserAgent;

    /**
     * The Solr URI.
     */
    private final String solrUri;

    /**
     * The SolrCloud URI.
     */
    private final String solrCloudUri;

    /**
     * The Solr unique key composition.
     */
    private String solrUniqueKeyComposition;

    /**
     * The ElasticSearch URI.
     */
    private final String elasticSearchUri;

    /**
     * The ElasticSearch cluster name.
     */
    private final String elasticSearchClusterName;

    /**
     * The ElasticSearch unique key composition.
     */
    private String elasticSearchUniqueKeyComposition;

    /**
     * The job override.
     */
    private String jobOverride;

    /**
     * The maximum number of documents per upload.
     */
    private Integer documentsPerUpload = 1;

    /**
     * The language analysis threshold, or 0 if the complete should be considered.
     */
    private Integer languageAnalysisThreshold = 0;

    /**
     * The language analysis instances allowed to execute simultaneously per unit.
     */
    private Integer languageAnalysisMaximumInstances = 1;

    /**
     * The language analysis instances waiting period (for an instance to become available.)
     */
    private Integer languageAnalysisWaitingTimeout = 0;

    /**
     * The language analysis profiles.
     */
    private String languageDetectionProfiles = null;

    /**
     * The attender memory index configuration.
     */
    private String attenderMemoryIndexConfiguration = null;

    /**
     * The minimum word break count.
     */
    private Integer wordBreakMin = 0;

    /**
     * The maximum word break count.
     */
    private Integer wordBreakMax = 0;

    /**
     * The jobs.
     */
    private final Map<String, EnricherJob> jobs = new HashMap<String, EnricherJob>();

    /**
     * The actual properties.
     */
    private final Properties properties = new Properties();

    /**
     * Default constructor.
     * 
     * @param settingsFile
     * @throws Exception
     */
    public EnricherSettings(final String settingsFile) throws Exception {
        this(extractProperties(settingsFile));
    }

    /**
     * Default constructor.
     * 
     * @param extraProperties
     * @throws Exception
     */
    public EnricherSettings(final Properties extraProperties) throws Exception {
        if (StringUtils.hasText(System.getProperty(PROPERTY_LOGLEVEL)))
            ((Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME))
                    .setLevel(Level.toLevel(System.getProperty(PROPERTY_LOGLEVEL)));

        // Fetch the initial properties

        try {
            InputStream internalStream = getClass().getClassLoader().getResourceAsStream("enricher.properties");

            properties.load(internalStream);

            internalStream.close();

            // Load the external properties as an addition to the internal ones

            if (extraProperties != null)
                for (Entry<Object, Object> extraProperty : extraProperties.entrySet())
                    properties.setProperty(extraProperty.getKey().toString(), extraProperty.getValue().toString());
        } catch (IOException e) {
            logger.error("Could not initialize settings from the given properties files", e);
        }

        databaseMatch = properties.getProperty(KEY_ENVELOPE_MATCH_DB).trim();
        encodingMatch = properties.getProperty(KEY_ENVELOPE_MATCH_ENCODING).trim();

        if (properties.containsKey(KEY_PROCESSOR_TIKA_DETECTORS))
            for (String detector : properties.getProperty(KEY_PROCESSOR_TIKA_DETECTORS).split(","))
                tikaDetectors.add(detector.trim());
        if (properties.containsKey(KEY_PROCESSOR_TIKA_PARSERS))
            for (String parser : properties.getProperty(KEY_PROCESSOR_TIKA_PARSERS).split(","))
                tikaParsers.add(parser.trim());

        aciAddUri = properties.getProperty(KEY_GENERATOR_ACI_ADD_URI).trim();
        aciCommitUri = properties.getProperty(KEY_GENERATOR_ACI_COMMIT_URI).trim();
        aciDeleteReferenceUri = properties.getProperty(KEY_GENERATOR_ACI_DELETE_REFERENCE_URI).trim();
        aciDeleteDocIdUri = properties.getProperty(KEY_GENERATOR_ACI_DELETE_DOCID_URI).trim();
        aciUserAgent = properties.getProperty(KEY_GENERATOR_ACI_USER_AGENT).trim();

        solrUri = properties.getProperty(KEY_GENERATOR_SOLR_URI).trim();
        solrCloudUri = properties.getProperty(KEY_GENERATOR_SOLR_CLOUD_URI).trim();
        solrUniqueKeyComposition = properties.getProperty(KEY_GENERATOR_SOLR_UNIQUE_KEY_COMPOSITION).trim();

        elasticSearchUri = properties.getProperty(KEY_GENERATOR_ELASTIC_SEARCH_URI).trim();
        elasticSearchClusterName = properties.getProperty(KEY_GENERATOR_ELASTIC_SEARCH_CLUSTER_NAME).trim();
        elasticSearchUniqueKeyComposition = properties
                .getProperty(KEY_GENERATOR_ELASTIC_SEARCH_UNIQUE_KEY_COMPOSITION).trim();

        if (properties.containsKey(KEY_GENERATOR_DOCUMENTS_PER_UPLOAD))
            documentsPerUpload = new Integer(properties.getProperty(KEY_GENERATOR_DOCUMENTS_PER_UPLOAD));

        if (properties.containsKey(KEY_LANGUAGE_ANALYSIS_THRESHOLD))
            languageAnalysisThreshold = new Integer(properties.getProperty(KEY_LANGUAGE_ANALYSIS_THRESHOLD));

        if (properties.containsKey(KEY_LANGUAGE_ANALYSIS_MAXIMUM_INSTANCES))
            languageAnalysisMaximumInstances = new Integer(
                    properties.getProperty(KEY_LANGUAGE_ANALYSIS_MAXIMUM_INSTANCES));
        if (properties.containsKey(KEY_LANGUAGE_ANALYSIS_WAITING_TIMEOUT))
            languageAnalysisWaitingTimeout = new Integer(
                    properties.getProperty(KEY_LANGUAGE_ANALYSIS_WAITING_TIMEOUT));

        if (properties.containsKey(KEY_LANGUAGE_DETECTION_PROFILES))
            languageDetectionProfiles = properties.getProperty(KEY_LANGUAGE_DETECTION_PROFILES);

        if (properties.containsKey(KEY_ATTENDER_MEMORY_INDEX_CONFIGURATION))
            attenderMemoryIndexConfiguration = properties.getProperty(KEY_ATTENDER_MEMORY_INDEX_CONFIGURATION);

        if (properties.containsKey(KEY_ENVELOPE_WORD_BREAK_MIN)
                && properties.containsKey(KEY_ENVELOPE_WORD_BREAK_MAX)) {
            wordBreakMin = new Integer(properties.getProperty(KEY_ENVELOPE_WORD_BREAK_MIN));
            wordBreakMax = new Integer(properties.getProperty(KEY_ENVELOPE_WORD_BREAK_MAX));
        }

        for (Entry<Object, Object> property : properties.entrySet()) {
            String name = (String) property.getKey();

            if (name.startsWith(KEY_ENVELOPE_JOB)) {
                String jobName = name.substring(KEY_ENVELOPE_JOB.length() + 1,
                        name.indexOf('.', KEY_ENVELOPE_JOB.length() + 1));

                if (!jobs.containsKey(jobName)) {
                    EnricherJob job = new EnricherJob();

                    job.setName(jobName);
                    job.setProcessedPath(retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_RESULT));

                    job.setDatabaseName(retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_DB_NAME));
                    job.setDatabaseNamePrerequisitesExpression(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_DB_NAME_PREREQUISITES));
                    job.setDatabaseNameComposition(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_DB_NAME_COMPOSITION));

                    job.setDatabaseType(retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_DB_TYPE));

                    job.setGeneratorTypes(StringUtils.tokenizeToStringArray(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_GENERATOR_TYPE), ",", true, true));
                    job.setGeneratorMapping(retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_GENERATOR_MAPPING));

                    job.setBoilerpipeExtractor(retrieveJobProperty(jobName, KEY_ENVELOPE_BOILERPIPE_EXTRACTOR));
                    job.setBoilerpipeExclusionField(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_BOILERPIPE_EXCLUSION_FIELD));
                    job.setBoilerpipeExclusionValues(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_BOILERPIPE_EXCLUSION_VALUES));

                    String[] stripTags = StringUtils.tokenizeToStringArray(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_STRIP_TAGS), ",", true, true);

                    job.setStripTags(stripTags != null ? Arrays.asList(stripTags) : new ArrayList<String>());

                    job.setFormatDetectionParameter(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_FORMAT_DETECTION_PARAMETER));
                    job.setFormatDetectionDefault(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_FORMAT_DETECTION_DEFAULT));

                    job.setLanguageDetectionParameter(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_PARAMETER));
                    job.setLanguageDetectionSupported(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_SUPPORTED));
                    job.setLanguageDetectionDefault(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_LANGUAGE_DETECTION_DEFAULT));

                    job.setEntityDetectionLanguages(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_ENTITY_DETECTION_LANGUAGES));
                    job.setEntityDetectionDescriptors(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_ENTITY_DETECTION_DESCRIPTORS));
                    job.setEntityDetectionScanTypes(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_ENTITY_DETECTION_SCAN_TYPES));
                    job.setEntityDetectionFieldPrefix(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_ENTITY_DETECTION_FIELD_PREFIX));

                    job.setGeoSpatialFieldPrefix(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_GEO_SPATIAL_FIELD_PREFIX));

                    job.setTikaResourceKeyPriority(
                            retrieveJobProperty(jobName, KEY_ENVELOPE_JOB_TIKA_RESOURCE_KEY_PRIORITY));

                    job.setFieldConverters(deriveFieldConverters(jobName));

                    jobs.put(jobName, job);
                }
            }
        }
    }

    private Map<String, EnricherFieldConverter> deriveFieldConverters(final String jobName) {
        Map<String, EnricherFieldConverter> converters = new HashMap<String, EnricherFieldConverter>();

        for (Entry<Object, Object> converterProperty : properties.entrySet()) {
            String converterPropertyName = (String) converterProperty.getKey();

            String databasePrefix = KEY_ENVELOPE_JOB + "." + jobName + "." + KEY_ENVELOPE_JOB_DATE_CONVERT;

            if (converterPropertyName.startsWith(databasePrefix)) {
                String fieldName = converterPropertyName.substring(databasePrefix.length() + 1,
                        converterPropertyName.indexOf('.', databasePrefix.length() + 1));

                if (!converters.containsKey(fieldName)) {
                    String dateFormatFrom = retrieveDatabaseProperty(jobName, fieldName,
                            KEY_ENVELOPE_JOB_FORMAT_FROM);
                    String dateFormatTo = retrieveDatabaseProperty(jobName, fieldName, KEY_ENVELOPE_JOB_FORMAT_TO);
                    String dateFormatLocale = retrieveDatabaseProperty(jobName, fieldName,
                            KEY_ENVELOPE_JOB_FORMAT_LOCALE);

                    if (dateFormatLocale == null) {
                        dateFormatLocale = Locale.getDefault().getLanguage();

                        logger.warn(
                                "Date format field " + fieldName + " will use default locale " + dateFormatLocale);
                    } else
                        dateFormatLocale = dateFormatLocale.toLowerCase();

                    converters.put(fieldName,
                            new EnricherFieldConverter(fieldName, dateFormatFrom, dateFormatTo, dateFormatLocale));
                }
            }
        }
        return converters;
    }

    /**
     * Retrieve a property for a specific job.
     * 
     * @param jobName
     * @param propertyName
     * @return String
     */
    private String retrieveJobProperty(final String jobName, final String propertyName) {
        return properties.getProperty(KEY_ENVELOPE_JOB + "." + jobName + "." + propertyName);
    }

    /**
     * Retrieve a property for a specific job.
     * 
     * @param jobName
     * @param fieldName
     * @param propertyName
     * @return
     */
    private String retrieveDatabaseProperty(final String jobName, final String fieldName,
            final String propertyName) {
        String databasePrefix = KEY_ENVELOPE_JOB + "." + jobName + "." + KEY_ENVELOPE_JOB_DATE_CONVERT;

        return properties.getProperty(databasePrefix + "." + fieldName + "." + propertyName);
    }

    /**
     * Extract the properties from the given file.
     * 
     * @param settingsFile
     * @return Properties
     * @throws IOException
     */
    private static Properties extractProperties(final String settingsFile) throws IOException {
        Properties extraProperties = new Properties();

        try {
            if (settingsFile != null) {
                InputStream externalStream = new FileInputStream(settingsFile);

                extraProperties.load(externalStream);

                externalStream.close();
            }
        } catch (IOException e) {
            logger.error("Could not initialize settings from the given properties files", e);

            throw e;
        }

        return extraProperties;
    }

    /**
     * Process the given commandline arguments.
     * 
     * @param arguments
     * @return boolean
     */
    public boolean processArguments(final String[] arguments) {
        for (String argument : arguments)
            if (argument.equals("-?") || argument.equals("--help")) {
                usage();

                return false;
            } else if (argument.equals("-v") || argument.equals("--verbose"))
                ((Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)).setLevel(Level.DEBUG);
            else if (argument.startsWith("-e"))
                encoding = argument.substring("-e".length());
            else if (argument.startsWith("--encoding="))
                encoding = argument.substring("--encoding=".length());
            else if (argument.startsWith("-j"))
                jobOverride = argument.substring("-d".length());
            else if (argument.startsWith("--job="))
                jobOverride = argument.substring("--job=".length());
            else
                inputFile = argument;

        if (inputFile == null) {
            usage();

            return false;
        }

        return true;
    }

    /**
     * Print the processor usage information.
     */
    private static void usage() {
        PrintStream out = System.err;

        out.println("usage: content-fetch [option] [file]");
        out.println();
        out.println("Options:");
        out.println("   -?  or --help      Print this usage message");
        out.println("   -v  or --verbose    Print debug level messages");
        out.println("   -jX or --job=X      Override with job X");
        out.println("   -eX or --encoding=X  Use output encoding X");
        out.println();
        out.println("Description:");
        out.println("   The file(s) specified on the command line will be");
        out.println("   parsed and will output the extracted text content");
        out.println("   and metadata to the configured output folder.");
        out.println();
        out.println("   Instead of a file name you can also specify the URL");
        out.println("   of a document to be parsed.");
    }

    /**
     * Get the inputFile.
     * 
     * @return String
     */
    public String getInputFile() {
        return inputFile;
    }

    /**
     * Get the encoding.
     * 
     * @return String
     */
    public String getEncoding() {
        return encoding;
    }

    /**
     * Get the documentsPerUpload.
     * 
     * @return Integer
     */
    public Integer getDocumentsPerUpload() {
        return documentsPerUpload;
    }

    /**
     * Get the wordBreakMin.
     * 
     * @return Integer
     */
    public Integer getWordBreakMin() {
        return wordBreakMin;
    }

    /**
     * Get the wordBreakMax.
     * 
     * @return Integer
     */
    public Integer getWordBreakMax() {
        return wordBreakMax;
    }

    /**
     * Get the jobOverride.
     * 
     * @return String
     */
    public String getJobOverride() {
        return jobOverride;
    }

    /**
     * Get the databaseMatch.
     * 
     * @return String
     */
    public String getDatabaseMatch() {
        return databaseMatch;
    }

    /**
     * Get the encodingMatch.
     * 
     * @return String
     */
    public String getEncodingMatch() {
        return encodingMatch;
    }

    /**
     * Get the tikaDetectors.
     * 
     * @return List<String>
     */
    public List<String> getTikaDetectors() {
        return tikaDetectors;
    }

    /**
     * Get the tikaParsers.
     * 
     * @return List<String>
     */
    public List<String> getTikaParsers() {
        return tikaParsers;
    }

    /**
     * Get the languageAnalysisThreshold.
     * 
     * @return Integer
     */
    public Integer getLanguageAnalysisThreshold() {
        return languageAnalysisThreshold;
    }

    /**
     * Get the languageAnalysisMaximumInstances.
     * 
     * @return Integer
     */
    public Integer getLanguageAnalysisMaximumInstances() {
        return languageAnalysisMaximumInstances;
    }

    /**
     * Get the languageAnalysisWaitingTimeout.
     * 
     * @return Integer
     */
    public Integer getLanguageAnalysisWaitingTimeout() {
        return languageAnalysisWaitingTimeout;
    }

    /**
     * Get the languageDetectionProfiles.
     * 
     * @return String
     */
    public String getLanguageDetectionProfiles() {
        return languageDetectionProfiles;
    }

    /**
     * Get the attenderMemoryIndexConfiguration.
     * 
     * @return String
     */
    public String getAttenderMemoryIndexConfiguration() {
        return attenderMemoryIndexConfiguration;
    }

    /**
     * Set the languageDetectionProfiles (typically used for testing without a settings-file.)
     * 
     * @param languageDetectionProfiles
     */
    public void setLanguageDetectionProfiles(final String languageDetectionProfiles) {
        this.languageDetectionProfiles = languageDetectionProfiles;
    }

    /**
     * Get all jobs.
     * 
     * @return Map<String, EnrichJob>
     */
    public Map<String, EnricherJob> getJobs() {
        return jobs;
    }

    /**
     * Get a specific job, or JOB_UNKNOWN.
     * 
     * @param name
     * @return ContentEnrichJob
     */
    public EnricherJob getJob(final String name) {
        if (name != null && jobs.containsKey(name))
            return jobs.get(name);
        else if (jobs.containsKey(JOB_UNKNOWN)) {
            logger.warn(
                    "The given job '" + name + "' has no matching configuration - resorting to generic NULL-job");

            return jobs.get(JOB_UNKNOWN);
        } else
            throw new IllegalArgumentException(String.format(
                    "The given job '%s' has no matching configuration and resorting to a generic NULL-job failed",
                    name));
    }

    /**
     * Get the aciAddUri.
     * 
     * @return String
     */
    public String getAciAddUri() {
        return aciAddUri;
    }

    /**
     * Get the aciCommitUri.
     * 
     * @return String
     */
    public String getAciCommitUri() {
        return aciCommitUri;
    }

    /**
     * Get the aciDeleteReferenceUri.
     * 
     * @return String
     */
    public String getAciDeleteReferenceUri() {
        return aciDeleteReferenceUri;
    }

    /**
     * Get the aciDeleteDocIdUri.
     * 
     * @return String
     */
    public String getAciDeleteDocIdUri() {
        return aciDeleteDocIdUri;
    }

    /**
     * Get the aciUserAgent.
     * 
     * @return String
     */
    public String getAciUserAgent() {
        return aciUserAgent;
    }

    /**
     * Get the solrUri.
     * 
     * @return String
     */
    public String getSolrUri() {
        return solrUri;
    }

    /**
     * Get the solrCloudUri.
     * 
     * @return String
     */
    public String getSolrCloudUri() {
        return solrCloudUri;
    }

    /**
     * Get the solrUniqueKeyComposition.
     * 
     * @return String
     */
    public String getSolrUniqueKeyComposition() {
        return solrUniqueKeyComposition;
    }

    /**
     * Set the solrUniqueKeyComposition.
     *
     * @param solrUniqueKeyComposition
     */
    public void setSolrUniqueKeyComposition(final String solrUniqueKeyComposition) {
        this.solrUniqueKeyComposition = solrUniqueKeyComposition;
    }

    /**
     * Get the elasticSearchUri.
     *
     * @return String
     */
    public String getElasticSearchUri() {
        return elasticSearchUri;
    }

    public String getElasticSearchClusterName() {
        return elasticSearchClusterName;
    }

    /**
     * Get the elasticSearchUniqueKeyComposition.
     *
     * @return String
     */
    public String getElasticSearchUniqueKeyComposition() {
        return elasticSearchUniqueKeyComposition;
    }

    /**
     * Set the elasticSearchUniqueKeyComposition.
     *
     * @param elasticSearchUniqueKeyComposition
     */
    public void setElasticSearchUniqueKeyComposition(final String elasticSearchUniqueKeyComposition) {
        this.elasticSearchUniqueKeyComposition = elasticSearchUniqueKeyComposition;
    }

    /**
     * Get the properties.
     * 
     * @return Properties
     */
    public Properties getProperties() {
        return properties;
    }
}