Java tutorial
package it.cnr.isti.hpc.dexter.lucene; /** * Copyright 2012 Salvatore Trani * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import it.cnr.isti.hpc.dexter.entity.EntityMatch; import it.cnr.isti.hpc.dexter.entity.EntityMatchList; import it.cnr.isti.hpc.dexter.spot.SpotMatch; import it.cnr.isti.hpc.dexter.spot.clean.SpotManager; import it.cnr.isti.hpc.dexter.spot.cleanpipe.cleaner.QuotesCleaner; import it.cnr.isti.hpc.dexter.spot.cleanpipe.cleaner.UnderscoreCleaner; import it.cnr.isti.hpc.dexter.spot.cleanpipe.cleaner.UnicodeCleaner; import it.cnr.isti.hpc.dexter.util.DexterParams; import it.cnr.isti.hpc.log.ProgressLogger; import it.cnr.isti.hpc.text.Text; import it.cnr.isti.hpc.wikipedia.article.Article; import it.cnr.isti.hpc.wikipedia.article.ArticleSummarizer; import it.cnr.isti.hpc.wikipedia.article.Link; import it.cnr.isti.hpc.wikipedia.article.Template; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInput; import java.io.ObjectInputStream; import java.io.ObjectOutput; import java.io.ObjectOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * LuceneHelper provides utilities for indexing, retrieving, and ranking * Wikipedia articles. * * @author Diego Ceccarelli <diego.ceccarelli@isti.cnr.it> * * Created on Aug 27, 2013 */ public class LuceneHelper { protected static final String LUCENE_ARTICLE_DEFAULT_FIELD = "content"; protected static final String LUCENE_ARTICLE_ID = "wiki-id"; protected static final String LUCENE_ARTICLE_WIKI_TITLE = "wiki-title"; protected static final String LUCENE_ARTICLE_TITLE = "title"; protected static final String LUCENE_ARTICLE_TYPE = "type"; protected static final String LUCENE_ARTICLE_LIST = "list"; protected static final String LUCENE_ARTICLE_INFOBOX = "infobox"; protected static final String LUCENE_ARTICLE_EMPH = "emph"; protected static final String LUCENE_ARTICLE_SECTIONS = "sections"; protected static final String LUCENE_ARTICLE_DESCRIPTIONS = "desc"; protected static final String LUCENE_ARTICLE_LINKS = "link"; protected static final String LUCENE_ARTICLE_CONTENT = "content"; protected static final String LUCENE_ARTICLE_SUMMARY = "summary"; /** * Logger for this class */ private static final Logger logger = LoggerFactory.getLogger(LuceneHelper.class); /** * The Lucene analyzer */ protected final StandardAnalyzer ANALYZER = new StandardAnalyzer(Version.LUCENE_41, CharArraySet.EMPTY_SET); /** * Singleton */ protected static LuceneHelper dexterHelper; protected Directory index; protected IndexWriter writer; protected IndexSearcher searcher; protected final IndexWriterConfig config; protected final ArticleSummarizer summarizer; private static DexterParams params = DexterParams.getInstance(); /** * number of documents indexed */ protected final int collectionSize; protected static final FieldType STORE_TERM_VECTORS = new FieldType(); protected static final FieldType STORE_TERM_VECTORS_NOT_STORED = new FieldType(); static { STORE_TERM_VECTORS.setIndexed(true); STORE_TERM_VECTORS.setTokenized(true); STORE_TERM_VECTORS.setStored(true); STORE_TERM_VECTORS.setStoreTermVectors(true); STORE_TERM_VECTORS.freeze(); STORE_TERM_VECTORS_NOT_STORED.setIndexed(true); STORE_TERM_VECTORS_NOT_STORED.setTokenized(true); STORE_TERM_VECTORS_NOT_STORED.setStored(false); STORE_TERM_VECTORS_NOT_STORED.setStoreTermVectors(true); STORE_TERM_VECTORS_NOT_STORED.freeze(); } private static SpotManager cleaner = new SpotManager(); protected final File wikiIdtToLuceneIdSerialization; protected static Map<Integer, Integer> wikiIdToLuceneId; static { cleaner.add(new UnicodeCleaner()); cleaner.add(new UnderscoreCleaner()); cleaner.add(new QuotesCleaner()); } /** * Opens or creates a lucene index in the given directory * * @param wikiIdtToLuceneIdSerialization * - the file containing the serialized mapping between wiki-id * and Lucene documents ids * * @param indexPath * - the path of the directory with the Lucene's index */ protected LuceneHelper(File wikiIdtToLuceneIdSerialization, File indexPath) { logger.info("opening lucene index in folder {}", indexPath); config = new IndexWriterConfig(Version.LUCENE_41, ANALYZER); this.wikiIdtToLuceneIdSerialization = wikiIdtToLuceneIdSerialization; BooleanQuery.setMaxClauseCount(1000); try { index = FSDirectory.open(indexPath); // writer.commit(); } catch (Exception e) { logger.error("opening the index: {}", e.toString()); System.exit(1); } summarizer = new ArticleSummarizer(); writer = getWriter(); collectionSize = writer.numDocs(); wikiIdToLuceneId = Collections.emptyMap(); } /** * @return an index reader */ protected IndexReader getReader() { IndexReader reader = null; try { reader = DirectoryReader.open(index); } catch (Exception e) { logger.error("reading the index: {} ", e.toString()); System.exit(1); } return reader; } protected IndexSearcher getSearcher() { if (searcher != null) return searcher; IndexReader reader = getReader(); searcher = new IndexSearcher(reader); return searcher; } /** * @return true if the dexter lucene index exists, false otherwise */ public static boolean hasDexterLuceneIndex() { File luceneFolder = params.getIndexDir(); return luceneFolder.exists(); } /** * Returns an instance of the Dexter's Lucene index. * * @return an instance of the Dexter's Lucene index */ public static LuceneHelper getDexterLuceneHelper() { if (dexterHelper == null) { File luceneFolder = params.getIndexDir(); File serializedWikiFile = params.getWikiToIdFile(); dexterHelper = new LuceneHelper(serializedWikiFile, luceneFolder); } return dexterHelper; } /** * Loads the map containing the conversion from the Wikipedia ids to the * Lucene Ids. */ protected void parseWikiIdToLuceneId() { logger.warn("no index wikiID -> lucene found - I'll generate"); IndexReader reader = getReader(); wikiIdToLuceneId = new HashMap<Integer, Integer>(reader.numDocs()); ProgressLogger pl = new ProgressLogger("creating wiki2lucene, readed {} docs", 100000); int numDocs = reader.numDocs(); for (int i = 0; i < numDocs; i++) { pl.up(); try { Document doc = reader.document(i); IndexableField f = doc.getField(LUCENE_ARTICLE_ID); Integer wikiId = new Integer(f.stringValue()); wikiIdToLuceneId.put(wikiId, i); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } /** * Dumps the map containing the conversion from the Wikipedia ids to the * Lucene Ids. */ protected void dumpWikiIdToLuceneId() { try { // Serializes to a file ObjectOutput out = new ObjectOutputStream(new FileOutputStream(wikiIdtToLuceneIdSerialization)); out.writeObject(wikiIdToLuceneId); out.close(); } catch (IOException e) { logger.info("dumping incoming links in a file ({})", e.toString()); System.exit(-1); } } /** * Loads the map containing the conversion from the Wikipedia ids to the * Lucene Ids. */ @SuppressWarnings("unchecked") public void loadWikiIdToLuceneId() { if (!wikiIdtToLuceneIdSerialization.exists()) { logger.info("{} not exists, generating", wikiIdtToLuceneIdSerialization); parseWikiIdToLuceneId(); logger.info("storing"); dumpWikiIdToLuceneId(); return; } logger.info("loading wiki id to lucene id "); try { InputStream is = new BufferedInputStream(new FileInputStream(wikiIdtToLuceneIdSerialization)); @SuppressWarnings("resource") ObjectInput oi = new ObjectInputStream(is); wikiIdToLuceneId = (Map<Integer, Integer>) oi.readObject(); } catch (Exception e) { logger.info("reading serialized object ({})", e.toString()); System.exit(-1); } logger.info("done "); } /** * @return the Lucene id of an article, given its wikiId */ protected int getLuceneId(int wikiId) { if (wikiIdToLuceneId.isEmpty()) { loadWikiIdToLuceneId(); } if (!wikiIdToLuceneId.containsKey(wikiId)) return -1; return wikiIdToLuceneId.get(wikiId); } /** * Returns the TFIDF-similarity between a given string and an article * * @param query * - the query containing the query to compare with the article * @param wikiId * - the id of the article to compare with the query * @return the TFIDF-similarity between the query and wikiId */ public float getSimilarity(Query query, int wikiId) { searcher = getSearcher(); int docId = getLuceneId(wikiId); Explanation e = null; try { e = searcher.explain(query, docId); } catch (IOException e1) { logger.error("getting similarity between text and doc {} ", wikiId); return 0; } return e.getValue(); } /** * Returns the cosine similarity between two documents * * @param x * - the WikiId of the first document * @param y * - the WikiId of the first document * * @return a double between 0 (not similar) and 1 (same content), * representing the similarity between the 2 documents */ public double getCosineSimilarity(int x, int y) { return getCosineSimilarity(x, y, LUCENE_ARTICLE_DEFAULT_FIELD); } /** * Returns the cosine similarity between two documents * * @param x * - the WikiId of the first document * @param y * - the WikiId of the first document * @param field * - the field on which to compute the similarity * * @return a double between 0 (not similar) and 1 (same content), * representing the similarity between the 2 documents */ public double getCosineSimilarity(int x, int y, String field) { IndexReader reader = getReader(); Terms tfvX = null; Terms tfvY = null; try { tfvX = reader.getTermVector(getLuceneId(x), field); tfvY = reader.getTermVector(getLuceneId(y), field); // try { // tfvX = reader.document(idX).getBinaryValue("asd") // getTermFreqVectors(idX); // tfvY = reader.getTermFreqVectors(idY); } catch (IOException e) { logger.error("computing cosine similarity ({}) ", e.toString()); System.exit(-1); } Map<String, Integer> xfrequencies = new HashMap<String, Integer>(); Map<String, Integer> yfrequencies = new HashMap<String, Integer>(); TermsEnum xtermsEnum = null; try { xtermsEnum = tfvX.iterator(null); BytesRef text; while ((text = xtermsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) xtermsEnum.totalTermFreq(); xfrequencies.put(term, freq); } TermsEnum ytermsEnum = tfvY.iterator(null); while ((text = ytermsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) ytermsEnum.totalTermFreq(); yfrequencies.put(term, freq); } } catch (IOException e) { logger.error("computing cosine similarity ({}) ", e.toString()); System.exit(-1); } Map<String, Double> xTfidf = new HashMap<String, Double>(); Map<String, Double> yTfidf = new HashMap<String, Double>(); double xnorm = tfidfVector(xTfidf, xfrequencies, field); double ynorm = tfidfVector(yTfidf, yfrequencies, field); double dotproduct = 0; for (Map.Entry<String, Double> k : xTfidf.entrySet()) { if (yTfidf.containsKey(k.getKey())) { logger.info("key {}", k.getKey()); logger.info("key x {} y {} ", k.getValue(), yTfidf.get(k.getKey())); dotproduct += k.getValue() * yTfidf.get(k.getKey()); logger.info("dotproduct {} ", dotproduct); } } return dotproduct / (xnorm * ynorm); } /** * Builds the TFIDF vector and its norm2 * * @param tfidf * - the vector containing for each term its TFIDF score, it will * be populated by this method * @param freq * - the vector containing for each term its frequency * @param field * - the field on which to compute the inverse document frequency * * @return the norm of the TFIDF vector * */ private double tfidfVector(Map<String, Double> tfidf, Map<String, Integer> freq, String field) { IndexReader reader = getReader(); double norm = 0; for (Map.Entry<String, Integer> entry : freq.entrySet()) { Term t = new Term(field, entry.getKey()); int df = 0; try { df = reader.docFreq(t); } catch (IOException e) { logger.error("computing tfidfVector ({}) ", e.toString()); System.exit(-1); } double idf = Math.log(collectionSize / (double) df + 1) / Math.log(2) + 1; double tfidfValue = entry.getValue() * idf; norm += tfidfValue * tfidfValue; tfidf.put(entry.getKey(), tfidfValue); } return Math.sqrt(norm); } /** * Converts an article to a Lucene Index * * @param a * - a Wikipedia Article to index * @return the Lucene Document representing the Wikipedia Article */ private Document toLuceneDocument(Article a) { Document d = new Document(); d.add(new TextField(LUCENE_ARTICLE_TITLE, a.getTitle(), Field.Store.YES)); d.add(new IntField(LUCENE_ARTICLE_ID, a.getWid(), Field.Store.YES)); d.add(new StringField(LUCENE_ARTICLE_WIKI_TITLE, a.getWikiTitle(), Field.Store.YES)); d.add(new StringField(LUCENE_ARTICLE_TYPE, String.valueOf(a.getType()), Field.Store.YES)); for (List<String> l : a.getLists()) { for (String e : l) d.add(new TextField(LUCENE_ARTICLE_LIST, e, Field.Store.NO)); } Template t = a.getInfobox(); d.add(new TextField(LUCENE_ARTICLE_INFOBOX, t.getName(), Field.Store.YES)); for (String e : t.getDescription()) { d.add(new TextField(LUCENE_ARTICLE_INFOBOX, e, Field.Store.YES)); } for (String e : a.getHighlights()) { d.add(new Field(LUCENE_ARTICLE_EMPH, e, STORE_TERM_VECTORS)); } for (String e : a.getSections()) { d.add(new TextField(LUCENE_ARTICLE_SECTIONS, e, Field.Store.NO)); } for (Link e : a.getLinks()) { d.add(new Field(LUCENE_ARTICLE_DESCRIPTIONS, cleaner.clean(e.getDescription()), STORE_TERM_VECTORS)); d.add(new Field(LUCENE_ARTICLE_LINKS, cleaner.clean(e.getCleanId().replace('_', ' ')), STORE_TERM_VECTORS)); } d.add(new Field(LUCENE_ARTICLE_CONTENT, cleaner.clean(a.getText()), STORE_TERM_VECTORS)); d.add(new Field(LUCENE_ARTICLE_SUMMARY, summarizer.getSummary(a), STORE_TERM_VECTORS)); return d; } /** * Indexes a Wikipedia Article * * @param a * the article to index */ public void addDocument(Article a) { writer = getWriter(); logger.debug("add doc {} ", a.getTitle()); try { writer.addDocument(toLuceneDocument(a)); // writer.addDocument(doc); } catch (Exception e) { logger.error("exception indexing a document: {} ({})", a.getTitle(), e.toString()); e.printStackTrace(); System.exit(1); } logger.debug("added doc {}", a.getWid()); } /** * Adds a Wikipedia Article (added just for testing) * * @param id * - the id of the Wikipedia Article * @param content * - the text of the Wikipedia Article */ protected void addDocument(int id, String content) { Article a = new Article(); a.setWid(id); a.setParagraphs(Arrays.asList(content)); addDocument(a); } /** * Clears the index */ public void clearIndex() { logger.info("delete all the documents indexed"); try { writer.deleteAll(); writer.commit(); } catch (IOException e) { logger.error("deleting the index: {}", e.toString()); System.exit(1); } } public void commit() { try { writer.commit(); // logger.info("commited, index contains {} documents", writer // .getReader().numDocs()); } catch (Exception e) { logger.error("committing: {}", e.toString()); System.exit(1); } } private Document getDoc(int wikiId) { IndexReader reader = getReader(); // System.out.println("get docId "+pos); if (wikiId <= 0) return null; int docId = getLuceneId(wikiId); if (docId < 0) { logger.warn("no id for wikiId {}", wikiId); return null; } logger.debug("get wikiId {} -> docId {}", wikiId, docId); Document doc = null; try { doc = reader.document(docId); } catch (Exception e) { logger.error("retrieving doc in position {} {}", docId, e.toString()); System.exit(-1); } return doc; } /** * @param query * - a query * @param field * - the field where to search the query * @return number of documents containing the text in query in the given * fields */ public int getFreq(String query, String field) { Query q = null; searcher = getSearcher(); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); // try { Text t = new Text(query).disableStopwords(); PhraseQuery pq = new PhraseQuery(); int i = 0; for (String term : t.getTerms()) { pq.add(new Term(field, term), i++); } q = pq; logger.debug(q.toString()); // } catch (ParseException e) { // logger.error("querying the index: {} ", e.toString()); // return -1; // } try { searcher.search(q, collector); } catch (IOException e) { logger.error("querying the index: {} ", e.toString()); return -1; } return collector.getTotalHits(); } /** * @param query * - a query * @param field * - the field on which to perform the query * @return number of documents containing the text in query in the given * fields */ public int getFreq(String query) { return getFreq(query, LUCENE_ARTICLE_DEFAULT_FIELD); } public int getFreqFromSummary(String query) { return getFreq(query, LUCENE_ARTICLE_SUMMARY); } private IndexWriter getWriter() { if (writer == null) try { writer = new IndexWriter(index, config); } catch (CorruptIndexException e1) { logger.error("creating the index: {}", e1.toString()); System.exit(-1); } catch (LockObtainFailedException e1) { logger.error("creating the index: {}", e1.toString()); System.exit(-1); } catch (IOException e1) { logger.error("creating the index: {}", e1.toString()); System.exit(-1); } return writer; } /** * @return the number of documents indexed */ public int numDocs() { IndexReader reader = getReader(); return reader.numDocs(); } public void closeWriter() { try { writer.close(); } catch (IOException e) { logger.error("closing the writer: {}", e.toString()); System.exit(-1); } } /** * @param query * entities containing the text of the query as a phrase (terms * consecutive) will be be returned. * @param field * the field where the query must be performed (summary, content, * title ..). * @param n * the max number of results to produce. * @return the top wiki-id matching the query */ public List<Integer> query(String query, String field, int n) { searcher = getSearcher(); TopScoreDocCollector collector = TopScoreDocCollector.create(n, true); List<Integer> results = new ArrayList<Integer>(); Query q = null; try { q = new QueryParser(Version.LUCENE_41, field, new StandardAnalyzer(Version.LUCENE_41)) .parse("\"" + query + "\""); } catch (ParseException e) { logger.error("querying the index: {} ", e.toString()); return results; } try { searcher.search(q, collector); } catch (IOException e) { logger.error("querying the index: {} ", e.toString()); return results; } ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; results.add(getWikiId(docId)); } logger.debug("query {} docs {}", query, results); return results; } /** * @param query * entities containing the text of the query as a phrase (terms * consecutive) will be be returned. * @param field * the field where the query must be performed (summary, content, * title ..). * @param n * the max number of results to produce. * @return the top wiki-id matching the query (returns max 10000 wiki-id ); */ public List<Integer> query(String query, String field) { return query(query, field, 10000); } /** * @return the top Lucene Document ids matching the query */ public List<Integer> query(String query) { return query(query, LUCENE_ARTICLE_DEFAULT_FIELD); } /** * Retrieves an article from the index * * @param id * - the Wikipedia Id of the Article * @return the document from the index */ public Article getArticle(int id) { Article a = new Article(); a.setWikiId(id); Document d = getDoc(id); if (d != null) { List<String> paragraphs = new ArrayList<String>(); paragraphs.add(d.getField(LUCENE_ARTICLE_CONTENT).stringValue()); a.setTitle(d.getField(LUCENE_ARTICLE_TITLE).stringValue()); a.setWikiTitle(d.getField(LUCENE_ARTICLE_WIKI_TITLE).stringValue()); a.setSummary(d.getField(LUCENE_ARTICLE_SUMMARY).stringValue()); a.setParagraphs(paragraphs); } // return a; } /** * Retrieves only the article summary and the title from the index * * @param id * - the Wikipedia Id of the Article * @return the document from the index */ public Article getArticleSummary(int id) { Article a = new Article(); a.setWikiId(id); Document d = getDoc(id); if (d != null) { a.setWikiTitle(d.getField(LUCENE_ARTICLE_WIKI_TITLE).stringValue()); a.setTitle(d.getField(LUCENE_ARTICLE_TITLE).stringValue()); a.setSummary(d.getField(LUCENE_ARTICLE_SUMMARY).stringValue()); } // return a; } public int getWikiId(int luceneId) { IndexReader reader = getReader(); // System.out.println("get docId "+pos); Document doc = null; try { doc = reader.document(luceneId); } catch (Exception e) { logger.error("retrieving doc in position {} {}", luceneId, e.toString()); System.exit(-1); } return Integer.parseInt(doc.get(LUCENE_ARTICLE_ID)); } /** * * Sorts a list of entities by their similarity with the string context. * * @param spot * - the spot for which the entities are sorted * @param eml * - the entity list to sort * @param context * - the context text, entities are sorted based on their * similarity with the context. * @param field * - sort the entity based on the similarity between their text * in this field and the context. * */ @SuppressWarnings("null") public void rankBySimilarity(SpotMatch spot, EntityMatchList eml, String context, String field) { if (context.trim().isEmpty()) { logger.warn("no context for spot {}", spot.getMention()); return; } Query q = null; try { // removing all not alphanumerical chars context = context.replaceAll("[^A-Za-z0-9 ]", " "); q = new QueryParser(Version.LUCENE_41, "content", new StandardAnalyzer(Version.LUCENE_41)) .parse(QueryParser.escape(context)); } catch (ParseException e) { logger.error("querying the index: {} ", e.toString()); logger.error("clauses = {} ", ((BooleanQuery) q).getClauses().length); return; } for (EntityMatch e : eml) { Integer luceneId = getLuceneId(e.getId()); float score = 0.5f; // smoothing if (luceneId == null || luceneId < 0) { // logger.warn("no docs in lucene for wiki id {}, ignoring", // e.id()); } else { score += getSimilarity(q, e.getId()); } e.setScore(score); } return; } /** * * Sorts a list of entities by their similarity (full text) with the string * context. * * @param spot * - the spot for which the entities are sorted * @param eml * - the entity list to sort * @param context * - the context text, entities are sorted based on their * similarity with the context. * * */ public void rankBySimilarity(SpotMatch spot, EntityMatchList eml, String context) { rankBySimilarity(spot, eml, context, LUCENE_ARTICLE_DEFAULT_FIELD); return; } }