fr.gael.dhus.search.SolrDao.java Source code

Java tutorial

Introduction

Here is the source code for fr.gael.dhus.search.SolrDao.java

Source

/*
 * Data Hub Service (DHuS) - For Space data distribution.
 * Copyright (C) 2013,2014,2015 GAEL Systems
 *
 * This file is part of DHuS software sources.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package fr.gael.dhus.search;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import fr.gael.dhus.database.dao.UserDao;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import fr.gael.dhus.database.dao.ProductDao;
import fr.gael.dhus.database.object.MetadataIndex;
import fr.gael.dhus.database.object.Product;
import fr.gael.dhus.database.object.Role;
import fr.gael.dhus.database.object.User;
import fr.gael.dhus.search.geocoder.AbstractGeocoder;
import fr.gael.dhus.search.geocoder.Geocoder;
import fr.gael.dhus.search.geocoder.GeocoderFactory;
import fr.gael.dhus.service.SecurityService;
import fr.gael.dhus.system.config.ConfigurationManager;

/**
 * DAO other Solr Interface.
 *
 */
@Component
public class SolrDao {
    private static Log logger = LogFactory.getLog(SolrDao.class);

    @Autowired
    private ProductDao productDao;

    @Autowired
    private SecurityService securityService;

    @Autowired
    private ConfigurationManager cfgManager;

    @Autowired
    private UserDao userDao;

    /**
     * Default Geocoder
     */
    private Geocoder geocoder;

    private static SolrServer solrServer = null;

    private static LoadingCache<String, SearchResult> cache = null;

    public Geocoder getGeocoder() {
        if (this.geocoder == null)
            geocoder = GeocoderFactory.getDefault(cfgManager.getGeocoderConfiguration().getUrl());
        return geocoder;
    }

    private LoadingCache<String, SearchResult> getResultsCache() {
        if (SolrDao.cache == null) {
            SolrDao.cache = CacheBuilder.newBuilder().concurrencyLevel(4).maximumSize(1000)
                    .expireAfterWrite(10, TimeUnit.MINUTES).expireAfterAccess(10, TimeUnit.MINUTES)
                    .build(new CacheLoader<String, SearchResult>() {
                        public SearchResult load(String key) {
                            String converted = key;
                            logger.info("Executing Query \""
                                    + ("" + converted).substring(0, Math.min(("" + converted).length(), 512))
                                    + " (...)\"");
                            return new SearchResult(getSolrServer(), converted);
                        }
                    });
        }
        return SolrDao.cache;
    }

    public static void resetQueryCache() {
        if (SolrDao.cache != null) {
            SolrDao.cache.cleanUp();
            SolrDao.cache.invalidateAll();
        }
    }

    /**
     * Retrive the online solr Server. The dhus solr server is expected at
     *    http://localhost:port/solr/dhus
     * @return the solr server instance.
     */
    private SolrServer getSolrServer() {
        if (solrServer == null) {
            int port = cfgManager.getServerConfiguration().getPort();

            String url = new String("http://localhost:" + port + "/solr/dhus");
            SolrServer s = new HttpSolrServer(url);
            solrServer = s;
        }
        return solrServer;
    }

    /**
     * Saves index in solr service
     * @param productPath
     * @param indexes
     */
    public void saveIndex(Product product, List<MetadataIndex> indexes) {
        String productPath = ProductDao.getPathFromProduct(product);
        SolrServer server = getSolrServer();
        // Prepare the document
        SolrDocument ro_doc = null;
        SolrInputDocument doc = null;

        if ((ro_doc = getDocumentByPath(productPath)) != null) {
            logger.info("Adding or updating fields in solr path '" + productPath + "'");
            doc = ClientUtils.toSolrInputDocument(ro_doc);
        } else
            doc = getInputDocByPath(productPath, product.getId());

        // ingest indexes
        for (MetadataIndex index : indexes) {
            String type = index.getType();
            // Only textual information stored in index
            if ((type == null) || type.isEmpty() || "text/plain".equals(type)) {
                //doc.addField ("contents", index.getName ());
                updateField(doc, "contents", index.getValue(), true);
            }

            if (index.getQueryable() != null) {
                updateField(doc, "contents", index.getQueryable(), true);

                updateField(doc, index.getQueryable().toLowerCase(), index.getValue(), false);

                if (logger.isDebugEnabled()) {
                    logger.debug("Added " + index.getQueryable() + ":" + index.getValue());
                }
            }
        }

        try {
            server.add(doc);
            server.commit();
        } catch (SolrServerException e) {
            logger.error("Cannot save index changes in solr.", e);
            return;
        } catch (IOException e) {
            // should never happend
            e.printStackTrace();
        }
        resetQueryCache();
    }

    public void removeIndexes(Product product) {
        SolrServer server = getSolrServer();
        try {
            server.deleteById(product.getId().toString());
            server.commit();
        } catch (SolrServerException e) {
            logger.error("Problem accessing the solr server.", e);
        } catch (IOException e) {
            logger.error("IO error.", e);
        }
        resetQueryCache();
    }

    /**
     * Processed Solr index optimization.
     * Shall be called asynchronously to avoid latencies.
     */
    public void optimize() {
        SolrServer server = getSolrServer();
        try {
            server.optimize();
        } catch (Exception e) {
        }
    }

    public void relocate(Long id, String new_path) {
        SolrDocument doc = getDocumentById(id);

        if (doc == null) {
            throw new DHusSearchException("Cannot retrieve document id(" + id + ") to be relocated.");
        }

        SolrInputDocument input = ClientUtils.toSolrInputDocument(doc);
        Map<String, Object> partialUpdate = new HashMap<String, Object>();
        partialUpdate.put("set", toSolrPath(new_path));
        updateField(input, "path", partialUpdate, false);
        try {
            getSolrServer().add(input);
            getSolrServer().commit();
        } catch (SolrServerException e) {
            throw new DHusSearchException("Cannot product path changes in solr.", e);
        } catch (IOException e) {
            // should never happend
            e.printStackTrace();
        }
        resetQueryCache();
    }

    /**
     * Retrieve the number of document stored into this solr server
     * @return number of documents.
     */
    public long getDocumentsNumber() {
        return search("*:*", false, null).size();
    }

    public long count(String query, User u) {
        return search(query, true, u).size();
    }

    private SolrDocument getDocumentById(Long id) {
        SearchResult sr = search("id:" + id, false, null);
        if (sr.hasNext())
            return sr.next();
        return null;
    }

    private SolrDocument getDocumentByPath(String productPath) {
        SearchResult sr = searchNoFilterQuery("path:" + toSolrPath(productPath));
        if (sr.hasNext())
            return sr.next();
        return null;
    }

    public SearchResult search(String squery) {
        return search(squery, true, null);
    }

    public SearchResult search(String squery, boolean restricted, User user) {
        try {
            if (getSolrServer().ping() == null) {
                throw new DHusSearchException("Solr Server not ready.");
            }
        } catch (Exception e) {
            throw new DHusSearchException(e.getMessage());
        }
        SearchResult docs;
        try {
            logger.info("Updating query \"" + ("" + squery).substring(0, Math.min(("" + squery).length(), 512))
                    + " (...)\"");
            String query = updateQuery(squery);

            logger.debug("Looking for docs...");
            logger.info("Searching for \"" + ("" + query).substring(0, Math.min(("" + query).length(), 512))
                    + " (...)\"");

            boolean accessFilterActif = Boolean.parseBoolean(System.getProperty("solr.filter.user", "false"));
            if (accessFilterActif) {
                docs = getResultsCache().get(query);
            } else {
                docs = getResultsCache().get(restricted ? getRestrictedQuery(query, user) : query);
            }

            // Rewind the iterator to the beginning
            docs.setOffset(0);
        } catch (ExecutionException e) {
            logger.error("Cannot retrieve results for query \"" + squery + "\".", e);
            return null;
        }
        return docs;
    }

    /**
     * Retrieves suggested values from solr.
     * @param prefix the string parsed by solr to retrieve suggestions.
     * @return a list of suggestions.
     */
    public List<String> getSuggestions(String prefix) {
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set("qt", "/suggest");
        params.set("q", prefix);
        params.set("spellcheck", "on");

        QueryResponse response;
        try {
            response = getSolrServer().query(params);
        } catch (SolrServerException e) {
            logger.warn("Cannot get suggestion for prefix \"" + prefix + "\".");
            return ImmutableList.of();
        }

        SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse();
        if (spellCheckResponse != null && !spellCheckResponse.isCorrectlySpelled()) {
            List<Suggestion> lst = response.getSpellCheckResponse().getSuggestions();
            // Returns only the last suggestion...
            if (!lst.isEmpty())
                return lst.get(lst.size() - 1).getAlternatives();
        }
        return ImmutableList.of();
    }

    /**
     * Restricts the query with the allowed username field
     * @param query
     * @return
     */
    public String getRestrictedQuery(String query, User user) {
        if (user == null)
            user = securityService.getCurrentUser();
        String userString = "";
        // Bypass for Data Right Managers. They can see all products and collections.
        if (!cfgManager.isDataPublic() && user != null && !user.getRoles().contains(Role.DATA_MANAGER)) {
            userString = "AND (user:(\"" + user.getUsername() + "\" OR \"" + userDao.getPublicDataName() + "\"))";
        }
        return "(" + query + ") " + userString;
    }

    public List<Product> getProductListByDocList(List<SolrDocument> docs) {
        List<Long> ids = new ArrayList<Long>();
        for (SolrDocument doc : docs)
            ids.add((Long) doc.get("id"));

        return productDao.read(ids);
    }

    public Product getProductByDoc(SolrDocument doc) {
        Product p = null;

        try {
            long id = (Long) doc.get("id");
            p = productDao.read(id);
        } catch (Exception e) {
            logger.error("Cannot retrieve product by its solr Id (trying by path).", e);
            String path = (String) doc.get("path");
            try {
                p = productDao.getProductByPath(new URL(toExternalPath(path)));
            } catch (MalformedURLException mfu) {
                logger.error("Bad path \"" + toExternalPath(path) + "\".", mfu);
            }
        }
        return p;
    }

    private static final HashMap<String, String> special_keys = new HashMap<String, String>() {
        private static final long serialVersionUID = 6935030352074844317L;

        {
            put(":", "<colon>");
            /*
            put (";", "<semicolon>");
            put ("-", "<minus>");
            put ("+", "<plus>");
            put ("_", "<underscore>");
            put ("~", "<tilde>");
            put ("^", "<circ>");
            put (" ", "<space>");
            put ("/", "<slash>");
            put ("\\", "<backslash>");
            put ("]", "<closebracket>");
            put ("[", "<openbracket>");
            put ("&", "<amp>");
            put ("*", "<asterisk>");
            */

        }
    };

    private String toSolrPath(String path) {
        logger.debug("Converting " + path);
        if (path.startsWith("/"))
            path = "file:/" + path;
        for (String spec : special_keys.keySet()) {
            path = path.replace(spec, special_keys.get(spec));
        }
        logger.debug("   to " + path);
        return path;
    }

    private String toExternalPath(String path) {
        logger.debug("Converting " + path);
        for (String spec : special_keys.keySet()) {
            path = path.replace(special_keys.get(spec), spec);
        }
        logger.debug("   to " + path);
        return path;
    }

    public void addUserRight(Product p, String username) {
        int tries = 5;
        do {
            try {
                _addUserRight(p, username);
                tries = 0;
            } catch (RemoteSolrException e) {
                int error = e.code();
                // Case of conflict access
                if (ErrorCode.getErrorCode(error) == ErrorCode.CONFLICT) {
                    tries--;
                    String message = "Solr concurrency access conflict detected " + "(product id#" + p.getId()
                            + ")";
                    if (tries > 0)
                        logger.warn(message + ", retring ...");
                    else {
                        throw new DHusSearchException(message, e);
                    }
                }
            }
        } while (tries > 0);
    }

    public void _addUserRight(Product p, String username) {
        SolrServer server = getSolrServer();
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);

        if (doc == null) {
            logger.warn("Cannot retrieve Product in solr to set user rights with path \"" + path + "\"");
            return;
        }

        boolean user_already_known = false;
        if (doc.containsKey("user")) {
            Collection<Object> users = doc.getFieldValues("user");
            for (Object user : users) {
                // Case of user already agreed
                if (username.equals(user)) {
                    user_already_known = true;
                    break;
                }
            }
        }
        if (!user_already_known) {
            SolrInputDocument new_doc = ClientUtils.toSolrInputDocument(doc);
            Collection<Object> users = doc.getFieldValues("user");
            List<Object> list = users == null ? new ArrayList<Object>() : new ArrayList<Object>(users);
            list.add(username);
            new_doc.setField("user", ImmutableMap.of("set", list));
            try {
                server.add(new_doc);
                server.commit();
            } catch (SolrServerException e) {
                logger.error("Cannot add user product rights for user \"" + username + "\"", e);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void removeUserRight(Product p, String username) {
        SolrServer server = getSolrServer();
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);
        if (doc == null)
            return;

        if (doc.containsKey("user")) {
            SolrInputDocument new_doc = null;
            Collection<Object> users = doc.getFieldValues("user");
            for (Object user : users) {
                // Case of user already agreed
                if (username.equals(user)) {
                    List<Object> list = new ArrayList<Object>(users);
                    list.remove(user);
                    new_doc = ClientUtils.toSolrInputDocument(doc);
                    new_doc.setField("user", ImmutableMap.of("set", list));
                }
            }

            if (new_doc != null) {
                try {
                    server.add(new_doc);
                    server.commit();
                } catch (SolrServerException e) {
                    logger.error("Cannot remove user rights for \"" + username + "\"", e);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public List<String> getAuthorizedUsers(Product p) {
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);

        HashSet<String> list = new HashSet<String>();
        if (doc.containsKey("user")) {
            for (Object o : doc.getFieldValues("user"))
                list.add(o.toString());
        }
        return new ArrayList<String>(list);
    }

    private SolrInputDocument getInputDocByPath(String path, Long id) {
        SolrInputDocument doc = new SolrInputDocument();
        doc.setField("path", toSolrPath(path));
        try {
            if (id == null) {
                Product p = productDao.getProductByPath(new URL(path));
                if (p == null)
                    logger.error("Path \"" + path + "\" not found in database.");
                doc.setField("id", p.getId());
            } else
                doc.setField("id", id);
        } catch (MalformedURLException e) {
            logger.error("Unknown product path " + path);
            return null;
        }
        return doc;
    }

    /**
     * Manages field add or replacement. document provided in intput shall be 
     * committed by the caller.
     * @param doc
     * @param field
     * @param value
     */
    private void updateField(SolrInputDocument doc, String field, Object value, boolean update) {
        if (!update && doc.containsKey(field)) {
            doc.remove(field);
        }
        doc.addField(field, value);
    }

    public String updateQuery(String query) {
        for (String[] strs : SolrQueryParser.parse(query)) {
            String key = strs[SolrQueryParser.INDEX_FIELD];
            String token = strs[SolrQueryParser.INDEX_VALUE];

            // If key defined, replace it by its lower case version.
            if (!"".equals(key)) {
                query = query.replace(key, key.toLowerCase());
            }

            if (!(!"".equals(key) || token.startsWith("{") || token.startsWith("[") || token.startsWith("(")
                    || token.contains("*") || token.contains("?") || token.contains("TO") || token.contains("OR")
                    || token.contains("AND") || token.matches(".*\\d.*") || !getSuggestions(token).isEmpty())) {
                String wtk_boundaries = null;
                try {
                    wtk_boundaries = ((AbstractGeocoder) getGeocoder()).getCachedBoundariesWKT(token);
                } catch (ExecutionException e) {
                    logger.error("Cannot get boundaries of \"" + token + "\"");
                }

                if (wtk_boundaries != null) {
                    String locate = "(" + token + " OR footprint:\"Intersects(" + wtk_boundaries
                            + ") distErrPct=0\")";
                    query = query.replace(token, locate).trim();
                }
            }
        }
        return query;
    }

    public void addProductInCollection(Product p, String cname) {
        SolrServer server = getSolrServer();
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);
        if (doc == null) {
            logger.warn("Cannot retrieve Product in solr to set collection with path \""
                    + ProductDao.getPathFromProduct(p));
            return;
        }
        boolean collection_already_known = false;
        if (doc.containsKey("collection")) {
            Collection<Object> collections = doc.getFieldValues("collection");
            for (Object collection : collections) {
                // Case of user already agreed
                if (cname.equals(collection)) {
                    collection_already_known = true;
                    break;
                }
            }
        }
        if (!collection_already_known) {
            SolrInputDocument new_doc = ClientUtils.toSolrInputDocument(doc);
            new_doc.setField("collection", ImmutableMap.of("add", cname));

            try {
                server.add(new_doc);
                server.commit();
            } catch (SolrServerException e) {
                logger.error("Cannot add product in collection \"" + cname + "\"", e);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void removeProductFromCollection(Product p, String cname) {
        SolrServer server = getSolrServer();
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);
        if (doc == null) {
            logger.error(
                    "Cannot retrieve Product to remove collection with path \"" + ProductDao.getPathFromProduct(p));
            return;
        }
        if (doc.containsKey("collection")) {
            Collection<Object> collections = doc.getFieldValues("collection");
            for (Object collection : collections) {
                // Case of user already agreed
                if (cname.equals(collection)) {
                    List<Object> list = new ArrayList<Object>(collections);
                    list.remove(collection);
                    SolrInputDocument new_doc = ClientUtils.toSolrInputDocument(doc);
                    // Seems not to work
                    // new_doc.setField ("collection", ImmutableMap.of ("remove", user));
                    new_doc.setField("collection", ImmutableMap.of("set", list));
                    try {
                        server.add(new_doc);
                        server.commit();
                    } catch (SolrServerException e) {
                        logger.error("Cannot remove product from collection \"" + cname + "\"", e);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    break;
                }
            }
        }
    }

    public void checkIndexes() {
        SolrServer server = getSolrServer();
        SearchResult sr = search("*:*", false, null);
        boolean changes = false;
        while (sr.hasNext()) {
            SolrDocument doc = sr.next();
            Product product = getProductByDoc(doc);
            if (product == null) {
                Object o = doc.get("path");
                String path = "unknown";
                if (o != null)
                    path = toExternalPath((String) o);
                try {
                    sr.remove();
                    logger.warn("Product \"" + path + "\" present in Solr Index but not in database: removed.");
                    changes = true;
                } catch (Exception e) {
                    logger.error("Cannot remove Solr entry " + path, e);
                }
            }
            try {
                server.commit();
            } catch (Exception e) {
                logger.error("Cannot commit Solr changes.");
            }
        }
        if (changes)
            resetQueryCache();

    }

    /**
     * Search all product bypassing the default query filter that hides all
     * the product under processing thanks to the passed query. The search is 
     * only performed on not processed products.
     * This call does not care of user rights. It is also no handled by
     * the search cache. Default fetch size used is 10.
     * @param query the product request query.
     * @return search result iterator.
     */
    private SearchResult searchNoFilterQuery(String query) {
        return new SearchResult(getSolrServer(), query, 10, "*");

    }

    public void setProcessed(Product p) {
        SolrServer server = getSolrServer();
        String path = ProductDao.getPathFromProduct(p);
        SolrDocument doc = getDocumentByPath(path);
        if (doc == null) {
            logger.warn("Cannot retrieve Product in solr to set it processed with path \"" + path);
            return;
        }

        SolrInputDocument new_doc = ClientUtils.toSolrInputDocument(doc);
        new_doc.setField("processed", true);

        try {
            server.add(new_doc);
            server.commit();
        } catch (SolrServerException e) {
            logger.error("Cannot set product processed", e);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}