de.fhg.iais.cortex.search.IndexerImpl.java Source code

Java tutorial

Introduction

Here is the source code for de.fhg.iais.cortex.search.IndexerImpl.java

Source

package de.fhg.iais.cortex.search;

/******************************************************************************
 * Copyright 2011 (c) Fraunhofer IAIS Netmedia  http://www.iais.fraunhofer.de *
 * ************************************************************************** *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
 * not use this file except in compliance with the License.                   *
 * You may obtain a copy of the License at                                    *
 * http://www.apache.org/licenses/LICENSE-2.0                                 *
 * Unless required by applicable law or agreed to in writing,                 *
 * software distributed under the License is distributed on an "AS IS" BASIS, *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
 * See the License for the specific language governing permissions and        *
 * limitations under the License.                                             *
 ******************************************************************************/

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.inject.Inject;
import javax.inject.Named;

import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

import de.fhg.iais.commons.annotation.UsedBy;
import de.fhg.iais.commons.time.StopWatch;
import de.fhg.iais.cortex.model.aip.AipDomType;
import de.fhg.iais.cortex.search.exception.IndexerException;
import de.fhg.iais.cortex.search.utils.FieldPreprocessor;
import de.fhg.iais.cortex.search.utils.SolrDocumentCreator;

@UsedBy("guice")
public class IndexerImpl implements IIndexer {

    private final Logger LOG = LoggerFactory.getLogger(IndexerImpl.class);
    public static final long DELETE_BATCH_SIZE = 1000;
    private final SolrServer solrServer;
    private SolrDocumentCreator documentCreator;
    private final int maxSortFieldLength = 5000;

    private class Document implements IIndexerDocument {

        private final FieldPreprocessor preprocessor = new FieldPreprocessor();
        private final FieldPreprocessor facet_preprocessor = new FieldPreprocessor(
                FieldPreprocessor.REPLACE_ON_TWO_SYMBOLS_PATTERN,
                FieldPreprocessor.REPLACE_ON_SYMBOL_AT_START_PATTERN,
                FieldPreprocessor.REPLACE_ON_SYMBOL_AT_END_PATTERN,
                FieldPreprocessor.REPLACE_ON_SYMBOL_WITH_WORDCHAR_PATTERN,
                FieldPreprocessor.REPLACE_ON_WORDCHAR_WITH_SYMBOL_PATTERN,
                FieldPreprocessor.REPLACE_ON_DOT_SYMBOL_PATTERN,
                FieldPreprocessor.REPLACE_ON_ALL_SYMBOL_WITH_MINUS_SYMBOL_PATTERN,
                FieldPreprocessor.REPLACE_ON_MINUS_SYMBOL_WITH_ALL_SYMBOL_PATTERN,
                FieldPreprocessor.REPLACE_ON_MINUS_SYMBOL_AT_START_PATTERN);

        private final SolrInputDocument solrDocument;

        public Document(String id) {
            this.solrDocument = new SolrInputDocument();
            IndexerImpl.this.documentCreator.setId(this.solrDocument, id);
        }

        private String preprocessFacet(String text) {
            return this.facet_preprocessor.preprocess(text, "\\s");
        }

        private String preprocess(String text) {
            return this.preprocessor.preprocess(text, "\\s");
        }

        @Override
        public void addPreview(String preview) {
            IndexerImpl.this.documentCreator.addPreview(this.solrDocument, preview);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.PREVIEW,
                    preprocess(preview));
        }

        @Override
        public void addAggregation(String preview) {
            String stripped = stripHtmlAndPrune(preview, 1024);
            IndexerImpl.this.documentCreator.addAggregationField(this.solrDocument, stripped);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.AGGREGGATION,
                    preprocess(stripped));
        }

        @Override
        public void addView(String view) {
            IndexerImpl.this.documentCreator.addView(this.solrDocument, view);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.VIEW,
                    preprocess(view));
        }

        @Override
        public void addFulltext(String fieldName, String text) {
            IndexerImpl.this.documentCreator.addField(this.solrDocument, fieldName, text);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, fieldName, preprocess(text));
        }

        @Override
        public void addFacetField(String field, String value) {
            IndexerImpl.this.documentCreator.addFacetField(this.solrDocument, field, value);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, field, preprocessFacet(value));
        }

        @Override
        public void addLabel(String itemLabel) {
            IndexerImpl.this.documentCreator.addLabel(this.solrDocument, itemLabel);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.LABEL,
                    preprocess(itemLabel));
        }

        @Override
        public void addPreprocessedField(String name, String value) {
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, name,
                    this.preprocessor.preprocess(value, "\\s"));
        }

        @Override
        public void addGeocode(String latitude, String longitude) {
            IndexerImpl.this.documentCreator.addGeocode(this.solrDocument, latitude, longitude);
        }

        @Override
        public void addLocationDisplayName(String locationDisplayName) {
            IndexerImpl.this.documentCreator.addLocationDisplayName(this.solrDocument, locationDisplayName);
        }

        @Override
        public void addProviderId(String providerId) {
            if (!StringUtils.isEmpty(providerId)) {
                IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.PROVIDER_ID, providerId);
            }
        }

        @Override
        public void addRevisionId(String revisionId) {
            if (!StringUtils.isEmpty(revisionId)) {
                IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.REVISION_ID, revisionId);
            }
        }

        @Override
        public void addIngestId(String ingestId) {
            IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.INGEST_ID, ingestId);
        }

        @Override
        public void addToIndex() {
            try {
                IndexerImpl.this.solrServer.add(this.solrDocument);
            } catch (SolrServerException e) {
                throw new IndexerException(e);
            } catch (IOException e) {
                throw new IndexerException(e);
            }
        }

        @Override
        public void storePreview(String preview, String title) {
            IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.PREVIEW_STORE, preview);

            IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.SORT,
                    title.substring(0, Math.min(IndexerImpl.this.maxSortFieldLength, title.length())));
        }

        @Override
        public void setCategory(String category) {
            if (AipDomType.Institution.name().equals(category)) {
                IndexerImpl.this.documentCreator.setDocumentBoost(this.solrDocument, 0.1f);
            }
            IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.CATEGORY, category);
        }

        @Override
        public void addInstitutionName(String institutionName) {
            IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.INSTITUTION_NAME,
                    institutionName);
            IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.INSTITUTION_NAME,
                    preprocess(institutionName));
        }

        @Override
        public void addSuggestions(List<String> suggestionFields) {
            List<Object> values = new ArrayList<Object>();
            for (String fieldName : this.solrDocument.getFieldNames()) {
                if (suggestionFields.contains(fieldName)) {
                    values.addAll(this.solrDocument.getFieldValues(fieldName));
                }
            }
            for (Object value : values) {
                IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.SUGGEST_FIELD,
                        (String) value);
            }
        }

        @Override
        public void addSortField(String name, String value) {
            IndexerImpl.this.documentCreator.addSortField(this.solrDocument, name, value);
        }

    }

    @Inject
    public IndexerImpl(@Named("search.indexer") SolrServer solrServer) {
        this.solrServer = solrServer;
        this.documentCreator = new SolrDocumentCreator();
    }

    public SolrDocumentCreator getDocumentCreator() {
        return this.documentCreator;
    }

    public void setDocumentCreator(SolrDocumentCreator documentCreator) {
        this.documentCreator = documentCreator;
    }

    @Override
    public IIndexerDocument createOrGetIndexerDocument(String id) {
        return new Document(id);
    }

    @Override
    public void forceCommit() {
        try {
            StopWatch watch = StopWatch.start();
            this.solrServer.commit();
            watch.stop("Time used for committing search index");
        } catch (SolrServerException e) {
            throw new IndexerException(e);
        } catch (IOException e) {
            throw new IndexerException(e);
        }
    }

    private static String stripHtmlAndPrune(final String input, final int maxlength) {
        String label = StringUtils.trimToEmpty(input.replaceAll("\\<.*?>", ""));
        final int securesize = maxlength * 2;
        if (label.length() > securesize) { //to avoid useless computation in next step
            label = label.substring(0, securesize);
        }
        char[] chars = label.toCharArray();
        for (int i = 0; i < chars.length; i++) {
            char c = chars[i];
            if (Character.isWhitespace(c) && (' ' != c)) {
                chars[i] = ' ';
            }
        }
        label = new String(chars).replaceAll(" {2,}", " ");
        final int finalsize = label.length();
        if (finalsize >= maxlength) {
            if (maxlength > 12) {
                return label.substring(0, maxlength - 3) + "...";
            } else {
                return label.substring(0, maxlength);
            }
        } else {
            return label;
        }
    }

    @Override
    public void deleteById(String id) {
        try {
            this.solrServer.deleteByQuery(SolrFields.ID + ":" + id);
        } catch (SolrServerException e) {
            throw new IndexerException("Could not delete document " + id + ".", e);
        } catch (IOException e) {
            throw new IndexerException("Could not delete document " + id + ".", e);
        }

    }

    @Override
    public List<String> deleteByIds(List<String> ids) {
        List<String> subList = Lists.newArrayList();
        for (int i = 0; i < ids.size(); i++) {
            if (i % DELETE_BATCH_SIZE == 0 && !subList.isEmpty()) {
                deleteListOfItems(subList);
                subList.clear();
            }
            subList.add(ids.get(i));
        }
        deleteListOfItems(subList);

        return ids;
    }

    private void deleteListOfItems(List<String> itemIds) {
        StringBuilder sb = new StringBuilder(SolrFields.ID).append(":").append("(");
        for (int i = 0; i < itemIds.size(); i++) {
            if (i == 0) {
                sb.append(itemIds.get(i));
            } else {
                sb.append(" OR ").append(itemIds.get(i));
            }
        }
        sb.append(")");

        try {
            this.solrServer.deleteByQuery(sb.toString());
        } catch (SolrServerException e) {
            throw new IndexerException("Could not delete documents.", e);
        } catch (IOException e) {
            throw new IndexerException("Could not delete documents.", e);
        }
    }

    @Override
    public void deleteByIngestId(String providerId, String ingestId) {
        try {
            String deleteQuery = SolrFields.PROVIDER_ID + ":" + providerId + " AND " + SolrFields.INGEST_ID + ":"
                    + ingestId;
            this.solrServer.deleteByQuery(deleteQuery);
        } catch (SolrServerException e) {
            throw new IndexerException("Could not delete for ingest " + ingestId + ".", e);
        } catch (IOException e) {
            throw new IndexerException("Could not delete for ingest " + ingestId + ".", e);
        }
    }

    @Override
    public void deleteByProviderId(String id) {
        try {
            String deleteQuery = SolrFields.PROVIDER_ID + ":" + id;
            this.solrServer.deleteByQuery(deleteQuery);
        } catch (SolrServerException e) {
            throw new IndexerException("Could not delete for provider " + id + ".", e);
        } catch (IOException e) {
            throw new IndexerException("Could not delete for provider " + id + ".", e);
        }
    }
}