Java tutorial
/* * Copyright (C) 2008-2015 by Holger Arndt * * This file is part of the Universal Java Matrix Package (UJMP). * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * UJMP is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * UJMP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with UJMP; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package org.ujmp.lucene; import java.io.Closeable; import java.io.File; import java.io.Flushable; import java.io.IOException; import java.io.Serializable; import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.ujmp.core.collections.map.AbstractMap; import org.ujmp.core.interfaces.Erasable; import org.ujmp.core.objectmatrix.ObjectMatrix2D; import org.ujmp.core.util.SerializationUtil; import org.ujmp.core.util.StringUtil; import org.ujmp.core.util.io.FileUtil; public class LuceneMap<K, V> extends AbstractMap<K, V> implements Flushable, Closeable, Erasable { private static final long serialVersionUID = 8998898900190996038L; private static final String KEYSTRING = "KS"; private static final String KEYDATA = "KD"; private static final String VALUESTRING = "VS"; private static final String VALUEDATA = "VD"; private transient Directory directory = null; private transient IndexWriter indexWriter = null; private transient IndexSearcher indexSearcher = null; private static final int MAXSEARCHSIZE = 1000000; private boolean readOnly = false; private transient File path = null; private transient Analyzer analyzer = null; public LuceneMap() throws IOException { this(null, false); } public LuceneMap(String dir) throws IOException { this(new File(dir)); } public LuceneMap(File dir) throws IOException { this(dir, false); } public LuceneMap(File path, boolean readOnly) throws IOException { this.readOnly = readOnly; this.path = path; } public synchronized Directory getDirectory() throws IOException { if (directory == null) { directory = FSDirectory.open(getPath()); } return directory; } public synchronized File getPath() throws IOException { if (path == null) { path = File.createTempFile("lucene_map_", ".tmp"); path.delete(); path.mkdir(); } return path; } public synchronized void clear() { try { getIndexWriter().deleteAll(); } catch (Exception e) { throw new RuntimeException("cannot clear index", e); } } public synchronized boolean containsKey(Object key) { try { Term term = new Term(KEYSTRING, getUniqueString(key)); return getIndexSearcher().getIndexReader().docFreq(term) > 0; } catch (Exception e) { throw new RuntimeException("could not search documents: " + key, e); } } public synchronized boolean containsValue(Object value) { try { Term term = new Term(VALUESTRING, getUniqueString(value)); return getIndexSearcher().getIndexReader().docFreq(term) > 0; } catch (Exception e) { throw new RuntimeException("could not search documents: " + value, e); } } @SuppressWarnings("unchecked") public synchronized V get(Object key) { try { Term term = new Term(KEYSTRING, getUniqueString(key)); TermQuery query = new TermQuery(term); TopDocs docs = getIndexSearcher().search(query, 1); if (docs.totalHits > 0) { ScoreDoc match = docs.scoreDocs[0]; Document doc = getIndexSearcher().doc(match.doc); return (V) SerializationUtil.deserialize(doc.getBinaryValue(VALUEDATA).bytes); } } catch (Exception e) { throw new RuntimeException("could not search documents: " + key, e); } return null; } public synchronized ObjectMatrix2D search(String searchString) { try { MultiFieldQueryParser p = new MultiFieldQueryParser(Version.LUCENE_47, new String[] { VALUESTRING }, getAnalyzer()); Query query = p.parse(searchString); TopDocs docs = getIndexSearcher().search(query, 100); ObjectMatrix2D result = ObjectMatrix2D.Factory.zeros(docs.totalHits, 3); for (int row = 0; row < docs.totalHits; row++) { ScoreDoc match = docs.scoreDocs[row]; Document doc = getIndexSearcher().doc(match.doc); result.setAsFloat(match.score, row, 0); result.setAsObject(SerializationUtil.deserialize(doc.getBinaryValue(KEYDATA).bytes), row, 1); result.setAsObject(SerializationUtil.deserialize(doc.getBinaryValue(VALUEDATA).bytes), row, 2); } return result; } catch (Exception e) { throw new RuntimeException("could not search documents: " + searchString, e); } } @SuppressWarnings("unchecked") public synchronized Set<K> keySet() { Set<K> set = new HashSet<K>(); if (isEmpty()) { return set; } try { Term term = new Term(KEYSTRING, "*"); WildcardQuery query = new WildcardQuery(term); TopDocs docs = getIndexSearcher().search(query, MAXSEARCHSIZE); for (ScoreDoc sd : docs.scoreDocs) { Document d = getIndexSearcher().doc(sd.doc); set.add((K) SerializationUtil.deserialize(d.getBinaryValue(KEYDATA).bytes)); } return set; } catch (Exception e) { throw new RuntimeException("could not search documents", e); } } private static String getUniqueString(Object o) throws IOException { if (o == null) { return ""; } else if (o instanceof String) { return (String) o; } else { return StringUtil.encodeToHex((Serializable) o); } } public synchronized V put(K key, V value) { try { Term term = new Term(KEYSTRING, getUniqueString(key)); Document doc = new Document(); doc.add(new StringField(KEYSTRING, getUniqueString(key), Field.Store.YES)); doc.add(new StoredField(KEYDATA, SerializationUtil.serialize((Serializable) key))); doc.add(new StringField(VALUESTRING, getUniqueString(value), Field.Store.YES)); doc.add(new StoredField(VALUEDATA, SerializationUtil.serialize((Serializable) value))); getIndexWriter().updateDocument(term, doc); return null; } catch (Exception e) { throw new RuntimeException("could not add document: " + key, e); } } public synchronized V remove(Object key) { try { Term term = new Term(KEYSTRING, getUniqueString(key)); getIndexWriter().deleteDocuments(term); return null; } catch (Exception e) { throw new RuntimeException("could not delete document: " + key, e); } } public Analyzer getAnalyzer() { if (analyzer == null) { analyzer = new StandardAnalyzer(Version.LUCENE_47); } return analyzer; } public void setAnalyzer(Analyzer analyzer) { this.analyzer = analyzer; } public synchronized int size() { try { flush(); if (indexSearcher != null) { return indexSearcher.getIndexReader().numDocs(); } else { int size = getIndexWriter().numDocs(); return size; } } catch (Exception e) { throw new RuntimeException("could not count documents", e); } } public synchronized void flush() throws IOException { IndexWriter iw = getIndexWriter(); iw.commit(); iw.close(true); indexWriter = null; } public synchronized void close() throws IOException { if (indexWriter != null) { indexWriter.close(true); } if (indexSearcher != null) { indexSearcher.getIndexReader().close(); } if (indexWriter != null) { indexWriter.close(true); indexWriter = null; } if (indexSearcher != null) { indexSearcher.getIndexReader().close(); indexSearcher = null; } } private synchronized IndexWriter getIndexWriter() { try { if (!readOnly && indexSearcher != null && indexSearcher.getIndexReader().getRefCount() > 0) { indexSearcher.getIndexReader().close(); indexSearcher = null; } if (indexWriter == null) { if (DirectoryReader.indexExists(getDirectory())) { if (!readOnly) { if (IndexWriter.isLocked(getDirectory())) { IndexWriter.unlock(getDirectory()); } indexWriter = new IndexWriter(getDirectory(), new IndexWriterConfig(Version.LUCENE_47, getAnalyzer())); } } else { if (!readOnly) { indexWriter = new IndexWriter(getDirectory(), new IndexWriterConfig(Version.LUCENE_47, getAnalyzer())); } } } return indexWriter; } catch (Exception e) { throw new RuntimeException("could not prepare writher", e); } } private synchronized IndexSearcher getIndexSearcher() { try { if (!DirectoryReader.indexExists(getDirectory())) { getIndexWriter(); } if (indexSearcher == null) { indexSearcher = new IndexSearcher(DirectoryReader.open(getIndexWriter(), true)); } return indexSearcher; } catch (Exception e) { throw new RuntimeException("could not prepare reader", e); } } public synchronized void erase() throws IOException { clear(); close(); FileUtil.deleteRecursive(path); } }