org.codelibs.fess.es.config.exentity.DataConfig.java Source code

Java tutorial

Introduction

Here is the source code for org.codelibs.fess.es.config.exentity.DataConfig.java

Source

/*
 * Copyright 2012-2019 CodeLibs Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.es.config.exentity;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.http.auth.AuthScheme;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.NTCredentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.auth.DigestScheme;
import org.apache.http.impl.auth.NTLMScheme;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
import org.codelibs.fess.crawler.client.ftp.FtpAuthentication;
import org.codelibs.fess.crawler.client.ftp.FtpClient;
import org.codelibs.fess.crawler.client.http.Authentication;
import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.codelibs.fess.crawler.client.http.form.FormScheme;
import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
import org.codelibs.fess.crawler.client.smb.SmbAuthentication;
import org.codelibs.fess.crawler.client.smb.SmbClient;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.es.config.bsentity.BsDataConfig;
import org.codelibs.fess.util.ParameterUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author FreeGen
 */
public class DataConfig extends BsDataConfig implements CrawlingConfig {

    private static final long serialVersionUID = 1L;

    private static final Logger logger = LoggerFactory.getLogger(DataConfig.class);

    private static final String CRAWLER_WEB_PREFIX = "crawler.web.";

    private static final String CRAWLER_WEB_HEADER_PREFIX = CRAWLER_WEB_PREFIX + "header.";

    private static final String CRAWLER_WEB_AUTH = CRAWLER_WEB_PREFIX + "auth";

    private static final String CRAWLER_USERAGENT = "crawler.useragent";

    private static final String CRAWLER_PARAM_PREFIX = "crawler.param.";

    private static final Object CRAWLER_FILE_AUTH = "crawler.file.auth";

    protected Pattern[] includedDocPathPatterns;

    protected Pattern[] excludedDocPathPatterns;

    private Map<String, String> handlerParameterMap;

    private Map<String, String> handlerScriptMap;

    public DataConfig() {
        super();
        setBoost(1.0f);
    }

    @Override
    public String getDocumentBoost() {
        return Float.valueOf(getBoost().floatValue()).toString();
    }

    public String getBoostValue() {
        if (boost != null) {
            return boost.toString();
        }
        return null;
    }

    public void setBoostValue(final String value) {
        if (value != null) {
            try {
                boost = Float.parseFloat(value);
            } catch (final Exception e) {
            }
        }
    }

    @Override
    public String getIndexingTarget(final String input) {
        // always return true
        return Constants.TRUE;
    }

    @Override
    public String getConfigId() {
        return ConfigType.DATA.getConfigId(getId());
    }

    public Map<String, String> getHandlerParameterMap() {
        if (handlerParameterMap == null) {
            handlerParameterMap = ParameterUtil.parse(getHandlerParameter());
        }
        return handlerParameterMap;
    }

    public Map<String, String> getHandlerScriptMap() {
        if (handlerScriptMap == null) {
            handlerScriptMap = ParameterUtil.parse(getHandlerScript());
        }
        return handlerScriptMap;
    }

    @Override
    public Map<String, Object> initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) {
        final Map<String, String> paramMap = getHandlerParameterMap();

        final Map<String, Object> factoryParamMap = new HashMap<>();
        crawlerClientFactory.setInitParameterMap(factoryParamMap);

        // parameters
        for (final Map.Entry<String, String> entry : paramMap.entrySet()) {
            final String key = entry.getKey();
            if (key.startsWith(CRAWLER_PARAM_PREFIX)) {
                factoryParamMap.put(key.substring(CRAWLER_PARAM_PREFIX.length()), entry.getValue());
            }
        }

        // user agent
        final String userAgent = paramMap.get(CRAWLER_USERAGENT);
        if (StringUtil.isNotBlank(userAgent)) {
            factoryParamMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
        }

        // web auth
        final String webAuthStr = paramMap.get(CRAWLER_WEB_AUTH);
        if (StringUtil.isNotBlank(webAuthStr)) {
            final String[] webAuthNames = webAuthStr.split(",");
            final List<Authentication> basicAuthList = new ArrayList<>();
            for (final String webAuthName : webAuthNames) {
                final String scheme = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".scheme");

                final AuthScheme authScheme = getAuthScheme(paramMap, webAuthName, scheme);
                final AuthScope authScope = getAuthScope(webAuthName, scheme, paramMap);
                final Credentials credentials = getCredentials(webAuthName, scheme, paramMap);
                basicAuthList.add(new AuthenticationImpl(authScope, credentials, authScheme));
            }
            factoryParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
                    basicAuthList.toArray(new Authentication[basicAuthList.size()]));
        }

        // request header
        final List<org.codelibs.fess.crawler.client.http.RequestHeader> rhList = new ArrayList<>();
        int count = 1;
        String headerName = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".name");
        while (StringUtil.isNotBlank(headerName)) {
            final String headerValue = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".value");
            rhList.add(new org.codelibs.fess.crawler.client.http.RequestHeader(headerName, headerValue));
            count++;
            headerName = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".name");
        }
        if (!rhList.isEmpty()) {
            factoryParamMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
                    rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()]));
        }

        // proxy credentials
        final String proxyHost = paramMap.get(CRAWLER_WEB_PREFIX + "proxyHost");
        final String proxyPort = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPort");
        if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
            factoryParamMap.put(HcHttpClient.PROXY_HOST_PROPERTY, proxyHost);
            factoryParamMap.put(HcHttpClient.PROXY_PORT_PROPERTY, proxyPort);
            final String proxyUsername = paramMap.get(CRAWLER_WEB_PREFIX + "proxyUsername");
            final String proxyPassword = paramMap.get(CRAWLER_WEB_PREFIX + "proxyPassword");
            if (proxyUsername != null && proxyPassword != null) {
                factoryParamMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY,
                        new UsernamePasswordCredentials(proxyUsername, proxyPassword));
            }
        } else {
            initializeDefaultHttpProxy(factoryParamMap);
        }

        // file auth
        final String fileAuthStr = paramMap.get(CRAWLER_FILE_AUTH);
        if (StringUtil.isNotBlank(fileAuthStr)) {
            final String[] fileAuthNames = fileAuthStr.split(",");
            final List<SmbAuthentication> smbAuthList = new ArrayList<>();
            final List<org.codelibs.fess.crawler.client.smb1.SmbAuthentication> smb1AuthList = new ArrayList<>();
            final List<FtpAuthentication> ftpAuthList = new ArrayList<>();
            for (final String fileAuthName : fileAuthNames) {
                final String scheme = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".scheme");
                if (Constants.SAMBA.equals(scheme)) {
                    final String domain = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".domain");
                    final String hostname = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".host");
                    final String port = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".port");
                    final String username = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".username");
                    final String password = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".password");

                    if (StringUtil.isEmpty(username)) {
                        logger.warn("username is empty. fileAuth:" + fileAuthName);
                        continue;
                    }

                    final SmbAuthentication smbAuth = new SmbAuthentication();
                    smbAuth.setDomain(domain == null ? StringUtil.EMPTY : domain);
                    smbAuth.setServer(hostname);
                    if (StringUtil.isNotBlank(port)) {
                        try {
                            smbAuth.setPort(Integer.parseInt(port));
                        } catch (final NumberFormatException e) {
                            logger.warn("Failed to parse " + port, e);
                        }
                    }
                    smbAuth.setUsername(username);
                    smbAuth.setPassword(password == null ? StringUtil.EMPTY : password);
                    smbAuthList.add(smbAuth);

                    final org.codelibs.fess.crawler.client.smb1.SmbAuthentication smb1Auth = new org.codelibs.fess.crawler.client.smb1.SmbAuthentication();
                    smb1Auth.setDomain(domain == null ? StringUtil.EMPTY : domain);
                    smb1Auth.setServer(hostname);
                    if (StringUtil.isNotBlank(port)) {
                        try {
                            smb1Auth.setPort(Integer.parseInt(port));
                        } catch (final NumberFormatException e) {
                            logger.warn("Failed to parse " + port, e);
                        }
                    }
                    smb1Auth.setUsername(username);
                    smb1Auth.setPassword(password == null ? StringUtil.EMPTY : password);
                    smb1AuthList.add(smb1Auth);
                } else if (Constants.FTP.equals(scheme)) {
                    final String hostname = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".host");
                    final String port = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".port");
                    final String username = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".username");
                    final String password = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".password");

                    if (StringUtil.isEmpty(username)) {
                        logger.warn("username is empty. fileAuth:" + fileAuthName);
                        continue;
                    }

                    final FtpAuthentication ftpAuth = new FtpAuthentication();
                    ftpAuth.setServer(hostname);
                    if (StringUtil.isNotBlank(port)) {
                        try {
                            ftpAuth.setPort(Integer.parseInt(port));
                        } catch (final NumberFormatException e) {
                            logger.warn("Failed to parse " + port, e);
                        }
                    }
                    ftpAuth.setUsername(username);
                    ftpAuth.setPassword(password == null ? StringUtil.EMPTY : password);
                    ftpAuthList.add(ftpAuth);
                }
            }
            if (!smbAuthList.isEmpty()) {
                factoryParamMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
                        smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
            }
            if (!smb1AuthList.isEmpty()) {
                factoryParamMap.put(org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
                        smb1AuthList.toArray(
                                new org.codelibs.fess.crawler.client.smb1.SmbAuthentication[smb1AuthList.size()]));
            }
            if (!ftpAuthList.isEmpty()) {
                factoryParamMap.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY,
                        ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()]));
            }
        }

        return factoryParamMap;
    }

    private AuthScheme getAuthScheme(final Map<String, String> paramMap, final String webAuthName,
            final String scheme) {
        AuthScheme authScheme = null;
        if (Constants.BASIC.equals(scheme)) {
            authScheme = new BasicScheme();
        } else if (Constants.DIGEST.equals(scheme)) {
            authScheme = new DigestScheme();
        } else if (Constants.NTLM.equals(scheme)) {
            final Properties props = new Properties();
            paramMap.entrySet().stream().filter(e -> e.getKey().startsWith("jcifs.")).forEach(e -> {
                props.setProperty(e.getKey(), e.getValue());
            });
            authScheme = new NTLMScheme(new JcifsEngine(props));
        } else if (Constants.FORM.equals(scheme)) {
            final String prefix = CRAWLER_WEB_AUTH + "." + webAuthName + ".";
            final Map<String, String> parameterMap = paramMap.entrySet().stream()
                    .filter(e -> e.getKey().startsWith(prefix))
                    .collect(Collectors.toMap(e -> e.getKey().substring(prefix.length()), e -> e.getValue()));
            authScheme = new FormScheme(parameterMap);
        }
        return authScheme;
    }

    private Credentials getCredentials(final String webAuthName, final String scheme,
            final Map<String, String> paramMap) {
        final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username");
        if (StringUtil.isEmpty(username)) {
            throw new CrawlerSystemException("username is empty. webAuth:" + webAuthName);
        }
        final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password");
        Credentials credentials;
        if (Constants.NTLM.equals(scheme)) {
            final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation");
            final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain");
            credentials = new NTCredentials(username, password == null ? StringUtil.EMPTY : password,
                    workstation == null ? StringUtil.EMPTY : workstation,
                    domain == null ? StringUtil.EMPTY : domain);
        } else {
            credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password);
        }
        return credentials;
    }

    private AuthScope getAuthScope(final String webAuthName, final String scheme,
            final Map<String, String> paramMap) {
        final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host");
        final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port");
        final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm");
        AuthScope authScope;
        if (StringUtil.isBlank(hostname)) {
            authScope = AuthScope.ANY;
        } else {
            int p = AuthScope.ANY_PORT;
            if (StringUtil.isNotBlank(port)) {
                try {
                    p = Integer.parseInt(port);
                } catch (final NumberFormatException e) {
                    logger.warn("Failed to parse " + port, e);
                }
            }

            String r = realm;
            if (StringUtil.isBlank(realm)) {
                r = AuthScope.ANY_REALM;
            }

            String s = scheme;
            if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) {
                s = AuthScope.ANY_SCHEME;
            }
            authScope = new AuthScope(hostname, p, r, s);
        }
        return authScope;
    }

    @Override
    public Map<String, String> getConfigParameterMap(final ConfigName name) {
        return Collections.emptyMap();
    }

    @Override
    public String getId() {
        return asDocMeta().id();
    }

    public void setId(final String id) {
        asDocMeta().id(id);
    }

    public Long getVersionNo() {
        return asDocMeta().version();
    }

    public void setVersionNo(final Long version) {
        asDocMeta().version(version);
    }

    @Override
    public Integer getTimeToLive() {
        final String value = getHandlerParameterMap().get("timeToLive");
        if (StringUtil.isBlank(value)) {
            return null;
        }
        try {
            return Integer.parseInt(value);
        } catch (final NumberFormatException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Invalid format: " + value, e);
            }
        }
        return null;
    }

    @Override
    public String toString() {
        return "DataConfig [available=" + available + ", boost=" + boost + ", createdBy=" + createdBy
                + ", createdTime=" + createdTime + ", handlerName=" + handlerName + ", handlerParameter="
                + handlerParameter + ", handlerScript=" + handlerScript + ", name=" + name + ", permissions="
                + Arrays.toString(permissions) + ", sortOrder=" + sortOrder + ", updatedBy=" + updatedBy
                + ", updatedTime=" + updatedTime + "]";
    }

}