Java tutorial
package nl.knaw.huygens.facetedsearch; /* * #%L * elab4-common * ======= * Copyright (C) 2013 - 2016 Huygens ING * ======= * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * #L% */ import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.FacetField.Count; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.HighlightParams; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multiset; import com.google.common.collect.Sets; import nl.knaw.huygens.Log; import nl.knaw.huygens.solr.FacetCount; import nl.knaw.huygens.solr.FacetInfo; import nl.knaw.huygens.solr.FacetType; import nl.knaw.huygens.solr.RangeOption; public abstract class AbstractSolrServer implements SolrServerWrapper { public static final String KEY_NUMFOUND = "numFound"; private static final int HIGHLIGHT_FRAGSIZE = 100; private static final int ROWS = 50000; private static final int FACET_LIMIT = 10000; protected SolrServer server; private final QueryComposer queryComposer; public AbstractSolrServer(QueryComposer queryComposer) { this.queryComposer = queryComposer; } public abstract void setServer(); @Override public void initialize() throws IndexException { try { server.deleteByQuery("*:*"); } catch (Exception e) { throw new IndexException(e.getMessage()); } } @Override public void optimize() throws IndexException { try { server.optimize(); } catch (Exception e) { throw new IndexException(e.getMessage()); } } @Override public void shutdown() throws IndexException { try { server.commit(); server.optimize(); } catch (Exception e) { throw new IndexException(e.getMessage()); } } @Override public boolean ping() { try { return server.ping().getStatus() == 0; } catch (Exception e) { Log.error("ping failed with '{}'", e.getMessage()); return false; } } @Override public void delete(String id) throws SolrServerException, IOException { server.deleteById(id); } @Override public void add(SolrInputDocument doc) throws IndexException { try { server.add(doc); } catch (Exception e) { throw new IndexException(e.getMessage()); } } @Override public void add(Collection<SolrInputDocument> docs) throws IndexException { try { server.add(docs); } catch (Exception e) { throw new IndexException(e.getMessage()); } } @Override public Map<String, Object> search(FacetedSearchParameters<?> fsp) throws IndexException { ElaborateSearchParameters sp = (ElaborateSearchParameters) fsp; Log.info("searchparameters={}", sp); queryComposer.compose(sp); String queryString = queryComposer.getSearchQuery(); String[] facetFields = getFacetFields(sp); // Log.debug("search({},{})", queryString, sp.getSort()); Map<String, String> textFieldMap = sp.getTextFieldsToSearch(); SolrQuery query = new SolrQuery(); String[] fieldsToReturn = getIndexFieldToReturn(sp.getResultFields()); query.setQuery(queryString)// .setFields(fieldsToReturn)// .setRows(ROWS)// .addFacetField(facetFields)// .setFacetMinCount(1)// .setFacetLimit(FACET_LIMIT); if (queryComposer.mustHighlight()) { query// .setHighlight(true)// .setHighlightSnippets(500)// .setHighlightFragsize(HIGHLIGHT_FRAGSIZE); query.set(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false); query.set(HighlightParams.MAX_CHARS, -1); query.set(HighlightParams.FIELDS, textFieldMap.keySet().toArray(new String[textFieldMap.size()])); query.set(HighlightParams.Q, queryComposer.getHighlightQuery()); } query = setSort(query, sp); Map<String, Object> data = getSearchData(sp, facetFields, query, fieldsToReturn); return data; } private String[] getFacetFields(ElaborateSearchParameters sp) { String[] facetFields = sp.getFacetFields(); List<String> facetFieldList = Lists.newArrayList(facetFields); for (RangeField rangeField : sp.getRanges()) { facetFieldList.add(rangeField.lowerField); facetFieldList.add(rangeField.upperField); } return facetFieldList.toArray(new String[] {}); } private Map<String, Object> getSearchData(ElaborateSearchParameters sp, String[] facetFields, SolrQuery query, String[] fieldsToReturn) throws IndexException { Map<String, Object> data = Maps.newHashMap(); data.put("term", query.getQuery()); try { Log.info("query=\n{}", query); QueryResponse response = server.query(query); Log.debug("response='{}'", response); SolrDocumentList documents = response.getResults(); data.put(KEY_NUMFOUND, documents.getNumFound()); Map<String, Map<String, List<String>>> highlighting = response.getHighlighting(); List<String> ids = Lists.newArrayList(); List<Map<String, Object>> results = Lists.newArrayList(); int occurrences = 0; for (SolrDocument document : documents) { String docId = document.getFieldValue(SolrFields.DOC_ID).toString(); ids.add(docId); Map<String, List<String>> map = (Map<String, List<String>>) ((highlighting == null) ? ImmutableMap.of() : highlighting.get(docId)); Map<String, Object> result = entryView(document, fieldsToReturn, map, sp.getTextFieldsToSearch()); results.add(result); for (Integer integer : ((Map<String, Integer>) result.get("terms")).values()) { occurrences += integer; } } data.put("ids", ids); data.put("results", results); data.put("occurrences", occurrences); data.put("facets", getFacetCountList(sp, facetFields, response)); } catch (SolrServerException e) { Log.error(e.getMessage()); throw new IndexException(e.getMessage()); } data.put("solrquery", query.toString()); return data; } private List<FacetCount> getFacetCountList(ElaborateSearchParameters sp, String[] facetFields, QueryResponse response) { List<FacetCount> facets = Lists.newArrayList(); for (String facetField : facetFields) { FacetInfo facetInfo = sp.getFacetInfoMap().get(facetField); if (facetInfo != null) { FacetCount facetCount = convertFacet(response.getFacetField(facetField), facetInfo.getTitle(), facetInfo.getType()); if (!facetCount.getOptions().isEmpty()) { facets.add(facetCount); } } } Map<String, Range> rangeMap = getRangeMap(sp, response); Set<Entry<String, Range>> entrySet = rangeMap.entrySet(); for (Entry<String, Range> entry : entrySet) { String name = entry.getKey(); Range range = entry.getValue(); RangeOption option = new RangeOption().setLowerLimit(range.lowest).setUpperLimit(range.highest); FacetCount fc = new FacetCount().setName(name + "_range").setTitle(name + " range") .setType(FacetType.RANGE).addOption(option); facets.add(fc); } return facets; } private Map<String, Range> getRangeMap(ElaborateSearchParameters sp, QueryResponse response) { Map<String, Range> map = Maps.newHashMap(); for (RangeField rangeField : sp.getRanges()) { Set<Integer> values = Sets.newHashSet(); List<String> rangeFields = ImmutableList.of(rangeField.lowerField, rangeField.upperField); for (String facetFieldName : rangeFields) { FacetField facetField = response.getFacetField(facetFieldName); for (Count count : facetField.getValues()) { values.add(Integer.valueOf(count.getName())); } } if (!values.isEmpty()) { List<Integer> list = Lists.newArrayList(values); Collections.sort(list); Range r = new Range(list.get(0), list.get(list.size() - 1)); map.put(rangeField.name, r); } } return map; } private Map<String, Object> entryView(SolrDocument document, String[] fieldsToReturn, Map<String, List<String>> kwicMap, Map<String, String> fieldMap) { Map<String, Object> view = Maps.newHashMap(); for (String field : fieldsToReturn) { if (field.startsWith(SolrUtils.METADATAFIELD_PREFIX)) { view.put(field, document.getFieldValues(field)); } else { view.put(field, document.getFieldValue(field)); } } // Map<String, List<String>> newKwicMap = Maps.newLinkedHashMap(); // Set<Entry<String, List<String>>> entrySet = kwicMap.entrySet(); // for (Entry<String, List<String>> entry : entrySet) { // String fieldName = entry.getKey(); // String fieldTitle = fieldMap.get(fieldName); // newKwicMap.put(fieldTitle, entry.getValue()); // } Multiset<String> terms = HashMultiset.create(); Map<String, Object> newKwicMap = Maps.newLinkedHashMap(); for (Entry<String, List<String>> entry : kwicMap.entrySet()) { String fieldName = entry.getKey(); List<String> raw = entry.getValue(); List<String> snippets = Lists.newArrayListWithCapacity(raw.size()); for (String snippet : raw) { snippets.add(snippet.trim()); } terms.addAll(extractTerms(snippets)); String fieldTitle = fieldMap.get(fieldName); newKwicMap.put(fieldTitle, snippets); } Map<String, Integer> termCountMap = getTermCountMap(terms); view.put("terms", termCountMap); view.put("_kwic", newKwicMap); return view; } private Map<String, Integer> getTermCountMap(Multiset<String> terms) { Map<String, Integer> termCountMap = Maps.newHashMap(); for (String term : terms.elementSet()) { termCountMap.put(term, terms.count(term)); } return termCountMap; } private String[] getIndexFieldToReturn(Collection<String> collection) { List<String> list = Lists.newArrayList(SolrFields.DOC_ID, SolrFields.NAME); for (String level : collection) { list.add(SolrUtils.facetName(level)); } return list.toArray(new String[list.size()]); } /** * Sets the sort criteria for the query. * @return query the SolrQuery */ private SolrQuery setSort(SolrQuery query, ElaborateSearchParameters sp) { LinkedHashSet<SortParameter> sortParameters = sp.getSortParameters(); for (SortParameter sortParameter : sortParameters) { if (StringUtils.isNotBlank(sortParameter.getFieldname())) { String facetName = SolrUtils.facetName(sortParameter.getFieldname()); ORDER solrOrder = solrOrder(sortParameter.getDirection()); query.addSort(facetName, solrOrder); } } LinkedHashSet<String> levelFields = Sets.newLinkedHashSet( ImmutableList.of(sp.getLevel1Field(), sp.getLevel2Field(), sp.getLevel3Field(), SolrFields.NAME)); for (String sortField : levelFields) { query.addSort(sortField, SolrQuery.ORDER.asc); } return query; } private ORDER solrOrder(String direction) { return "asc".equals(direction) ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc; } // /** // * Sets the sort criteria for the query. // */ // private SolrQuery setSort1(SolrQuery query, ElaborateSearchParameters sp) { // boolean ascending = sp.isAscending(); // String sortField = sp.getSort(); // ORDER sortOrder = ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc; // if (SolrFields.SCORE.equals(sortField)) { // query.addSort(SolrFields.SCORE, ascending ? SolrQuery.ORDER.desc : SolrQuery.ORDER.asc); // // } else if (sortField != null) { // query.addSort(sortField, sortOrder); // } // // query.addSort(sp.getLevel1Field(), SolrQuery.ORDER.asc); // query.addSort(sp.getLevel2Field(), SolrQuery.ORDER.asc); // query.addSort(sp.getLevel3Field(), SolrQuery.ORDER.asc); // query.addSort(SolrFields.NAME, SolrQuery.ORDER.asc); // return query; // } /** * Returns a list of facetinfo with counts. * @param field * @param title * @param type */ protected FacetCount convertFacet(FacetField field, String title, FacetType type) { if (field != null) { FacetCount facetCount = new FacetCount()// .setName(field.getName())// .setTitle(title)// .setType(type); List<Count> counts = field.getValues(); if (counts != null) { for (Count count : counts) { FacetCount.Option option = new FacetCount.Option()// .setName(count.getName())// .setCount(count.getCount()); facetCount.addOption(option); } } return facetCount; } return null; } public static final String HL_PRE = "<em>"; public static final String HL_POST = "</em>"; private static final Pattern HL_REGEX = Pattern.compile(HL_PRE + "(.+?)" + HL_POST); public static Collection<String> extractTerms(List<String> snippets) { Collection<String> terms = Lists.newArrayList(); for (String snippet : snippets) { final Matcher matcher = HL_REGEX.matcher(snippet); while (matcher.find()) { terms.add(matcher.group(1).toLowerCase()); } } return terms; } public static class Range { public int lowest; public int highest; public Range(int lowest, int highest) { this.lowest = lowest; this.highest = highest; } public void combineWith(Range other) { this.lowest = Math.min(this.lowest, other.lowest); this.highest = Math.max(this.highest, other.highest); } } }