Java tutorial
package de.fhg.iais.cortex.search; /****************************************************************************** * Copyright 2011 (c) Fraunhofer IAIS Netmedia http://www.iais.fraunhofer.de * * ************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); you may * * not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * ******************************************************************************/ import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.inject.Inject; import javax.inject.Named; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import de.fhg.iais.commons.annotation.UsedBy; import de.fhg.iais.commons.time.StopWatch; import de.fhg.iais.cortex.model.aip.AipDomType; import de.fhg.iais.cortex.search.exception.IndexerException; import de.fhg.iais.cortex.search.utils.FieldPreprocessor; import de.fhg.iais.cortex.search.utils.SolrDocumentCreator; @UsedBy("guice") public class IndexerImpl implements IIndexer { private final Logger LOG = LoggerFactory.getLogger(IndexerImpl.class); public static final long DELETE_BATCH_SIZE = 1000; private final SolrServer solrServer; private SolrDocumentCreator documentCreator; private final int maxSortFieldLength = 5000; private class Document implements IIndexerDocument { private final FieldPreprocessor preprocessor = new FieldPreprocessor(); private final FieldPreprocessor facet_preprocessor = new FieldPreprocessor( FieldPreprocessor.REPLACE_ON_TWO_SYMBOLS_PATTERN, FieldPreprocessor.REPLACE_ON_SYMBOL_AT_START_PATTERN, FieldPreprocessor.REPLACE_ON_SYMBOL_AT_END_PATTERN, FieldPreprocessor.REPLACE_ON_SYMBOL_WITH_WORDCHAR_PATTERN, FieldPreprocessor.REPLACE_ON_WORDCHAR_WITH_SYMBOL_PATTERN, FieldPreprocessor.REPLACE_ON_DOT_SYMBOL_PATTERN, FieldPreprocessor.REPLACE_ON_ALL_SYMBOL_WITH_MINUS_SYMBOL_PATTERN, FieldPreprocessor.REPLACE_ON_MINUS_SYMBOL_WITH_ALL_SYMBOL_PATTERN, FieldPreprocessor.REPLACE_ON_MINUS_SYMBOL_AT_START_PATTERN); private final SolrInputDocument solrDocument; public Document(String id) { this.solrDocument = new SolrInputDocument(); IndexerImpl.this.documentCreator.setId(this.solrDocument, id); } private String preprocessFacet(String text) { return this.facet_preprocessor.preprocess(text, "\\s"); } private String preprocess(String text) { return this.preprocessor.preprocess(text, "\\s"); } @Override public void addPreview(String preview) { IndexerImpl.this.documentCreator.addPreview(this.solrDocument, preview); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.PREVIEW, preprocess(preview)); } @Override public void addAggregation(String preview) { String stripped = stripHtmlAndPrune(preview, 1024); IndexerImpl.this.documentCreator.addAggregationField(this.solrDocument, stripped); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.AGGREGGATION, preprocess(stripped)); } @Override public void addView(String view) { IndexerImpl.this.documentCreator.addView(this.solrDocument, view); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.VIEW, preprocess(view)); } @Override public void addFulltext(String fieldName, String text) { IndexerImpl.this.documentCreator.addField(this.solrDocument, fieldName, text); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, fieldName, preprocess(text)); } @Override public void addFacetField(String field, String value) { IndexerImpl.this.documentCreator.addFacetField(this.solrDocument, field, value); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, field, preprocessFacet(value)); } @Override public void addLabel(String itemLabel) { IndexerImpl.this.documentCreator.addLabel(this.solrDocument, itemLabel); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.LABEL, preprocess(itemLabel)); } @Override public void addPreprocessedField(String name, String value) { IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, name, this.preprocessor.preprocess(value, "\\s")); } @Override public void addGeocode(String latitude, String longitude) { IndexerImpl.this.documentCreator.addGeocode(this.solrDocument, latitude, longitude); } @Override public void addLocationDisplayName(String locationDisplayName) { IndexerImpl.this.documentCreator.addLocationDisplayName(this.solrDocument, locationDisplayName); } @Override public void addProviderId(String providerId) { if (!StringUtils.isEmpty(providerId)) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.PROVIDER_ID, providerId); } } @Override public void addRevisionId(String revisionId) { if (!StringUtils.isEmpty(revisionId)) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.REVISION_ID, revisionId); } } @Override public void addIngestId(String ingestId) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.INGEST_ID, ingestId); } @Override public void addToIndex() { try { IndexerImpl.this.solrServer.add(this.solrDocument); } catch (SolrServerException e) { throw new IndexerException(e); } catch (IOException e) { throw new IndexerException(e); } } @Override public void storePreview(String preview, String title) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.PREVIEW_STORE, preview); IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.SORT, title.substring(0, Math.min(IndexerImpl.this.maxSortFieldLength, title.length()))); } @Override public void setCategory(String category) { if (AipDomType.Institution.name().equals(category)) { IndexerImpl.this.documentCreator.setDocumentBoost(this.solrDocument, 0.1f); } IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.CATEGORY, category); } @Override public void addInstitutionName(String institutionName) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.INSTITUTION_NAME, institutionName); IndexerImpl.this.documentCreator.addPreprocessedField(this.solrDocument, SolrFields.INSTITUTION_NAME, preprocess(institutionName)); } @Override public void addSuggestions(List<String> suggestionFields) { List<Object> values = new ArrayList<Object>(); for (String fieldName : this.solrDocument.getFieldNames()) { if (suggestionFields.contains(fieldName)) { values.addAll(this.solrDocument.getFieldValues(fieldName)); } } for (Object value : values) { IndexerImpl.this.documentCreator.addField(this.solrDocument, SolrFields.SUGGEST_FIELD, (String) value); } } @Override public void addSortField(String name, String value) { IndexerImpl.this.documentCreator.addSortField(this.solrDocument, name, value); } } @Inject public IndexerImpl(@Named("search.indexer") SolrServer solrServer) { this.solrServer = solrServer; this.documentCreator = new SolrDocumentCreator(); } public SolrDocumentCreator getDocumentCreator() { return this.documentCreator; } public void setDocumentCreator(SolrDocumentCreator documentCreator) { this.documentCreator = documentCreator; } @Override public IIndexerDocument createOrGetIndexerDocument(String id) { return new Document(id); } @Override public void forceCommit() { try { StopWatch watch = StopWatch.start(); this.solrServer.commit(); watch.stop("Time used for committing search index"); } catch (SolrServerException e) { throw new IndexerException(e); } catch (IOException e) { throw new IndexerException(e); } } private static String stripHtmlAndPrune(final String input, final int maxlength) { String label = StringUtils.trimToEmpty(input.replaceAll("\\<.*?>", "")); final int securesize = maxlength * 2; if (label.length() > securesize) { //to avoid useless computation in next step label = label.substring(0, securesize); } char[] chars = label.toCharArray(); for (int i = 0; i < chars.length; i++) { char c = chars[i]; if (Character.isWhitespace(c) && (' ' != c)) { chars[i] = ' '; } } label = new String(chars).replaceAll(" {2,}", " "); final int finalsize = label.length(); if (finalsize >= maxlength) { if (maxlength > 12) { return label.substring(0, maxlength - 3) + "..."; } else { return label.substring(0, maxlength); } } else { return label; } } @Override public void deleteById(String id) { try { this.solrServer.deleteByQuery(SolrFields.ID + ":" + id); } catch (SolrServerException e) { throw new IndexerException("Could not delete document " + id + ".", e); } catch (IOException e) { throw new IndexerException("Could not delete document " + id + ".", e); } } @Override public List<String> deleteByIds(List<String> ids) { List<String> subList = Lists.newArrayList(); for (int i = 0; i < ids.size(); i++) { if (i % DELETE_BATCH_SIZE == 0 && !subList.isEmpty()) { deleteListOfItems(subList); subList.clear(); } subList.add(ids.get(i)); } deleteListOfItems(subList); return ids; } private void deleteListOfItems(List<String> itemIds) { StringBuilder sb = new StringBuilder(SolrFields.ID).append(":").append("("); for (int i = 0; i < itemIds.size(); i++) { if (i == 0) { sb.append(itemIds.get(i)); } else { sb.append(" OR ").append(itemIds.get(i)); } } sb.append(")"); try { this.solrServer.deleteByQuery(sb.toString()); } catch (SolrServerException e) { throw new IndexerException("Could not delete documents.", e); } catch (IOException e) { throw new IndexerException("Could not delete documents.", e); } } @Override public void deleteByIngestId(String providerId, String ingestId) { try { String deleteQuery = SolrFields.PROVIDER_ID + ":" + providerId + " AND " + SolrFields.INGEST_ID + ":" + ingestId; this.solrServer.deleteByQuery(deleteQuery); } catch (SolrServerException e) { throw new IndexerException("Could not delete for ingest " + ingestId + ".", e); } catch (IOException e) { throw new IndexerException("Could not delete for ingest " + ingestId + ".", e); } } @Override public void deleteByProviderId(String id) { try { String deleteQuery = SolrFields.PROVIDER_ID + ":" + id; this.solrServer.deleteByQuery(deleteQuery); } catch (SolrServerException e) { throw new IndexerException("Could not delete for provider " + id + ".", e); } catch (IOException e) { throw new IndexerException("Could not delete for provider " + id + ".", e); } } }