de.anycook.db.lucene.FulltextIndex.java Source code

Java tutorial

Introduction

Here is the source code for de.anycook.db.lucene.FulltextIndex.java

Source

/*
 * This file is part of anycook. The new internet cookbook
 * Copyright (C) 2014 Jan Graegger
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see [http://www.gnu.org/licenses/].
 */

package de.anycook.db.lucene;

import de.anycook.conf.Configuration;
import de.anycook.db.mysql.DBGetRecipe;
import de.anycook.db.mysql.DBRecipe;
import de.anycook.recipe.Recipe;
import de.anycook.recipe.step.Step;
import de.anycook.recipe.step.Steps;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NIOFSDirectory;

import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

public class FulltextIndex {

    private static FulltextIndex singleton = null;

    private final Logger logger;
    private final Analyzer analyzer = new NGramAnalyzer();
    private final Directory index;
    //private final IndexWriterConfig indexWriterConfig;

    private FulltextIndex() throws IOException {
        logger = LogManager.getLogger(getClass());
        String indexPath = Configuration.getInstance().getFullTextIndexPath();
        index = new NIOFSDirectory(Paths.get(indexPath));
    }

    public static FulltextIndex init() throws IOException {
        if (singleton == null) {
            singleton = new FulltextIndex();
            LogManager.getLogger(FulltextIndex.class).info("created new instance of FulltextIndex");
        }

        return singleton;
    }

    private IndexWriterConfig createIndexWriterConfig() {
        return new IndexWriterConfig(analyzer);
    }

    public void addRecipe(String recipeName) throws SQLException, DBRecipe.RecipeNotFoundException, IOException {
        recipeName = recipeName.toLowerCase(Locale.GERMAN);
        if (checkIfRecipeExists(recipeName)) {
            removeRecipe(recipeName);
        }

        try (DBGetRecipe dbGetRecipe = new DBGetRecipe()) {
            Recipe recipe = dbGetRecipe.get(recipeName);

            int id = dbGetRecipe.getActiveIdfromRecipe(recipeName);

            String date = DateTools.dateToString(new Date(), Resolution.DAY);

            List<Step> steps = Steps.loadRecipeSteps(recipeName);
            StringBuilder stepText = new StringBuilder();
            for (Step step : steps) {
                stepText.append(" ").append(step.getText());
            }

            try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) {
                Document doc = new Document();
                doc.add(new TextField("title", recipe.getName(), Field.Store.YES));
                doc.add(new TextField("description", recipe.getDescription() == null ? "" : recipe.getDescription(),
                        Field.Store.YES));
                doc.add(new TextField("steps", stepText.toString(), Field.Store.YES));
                doc.add(new IntField("version_id", id, Field.Store.YES));
                doc.add(new TextField("date", date, Field.Store.YES));

                writer.addDocument(doc);
                writer.commit();

                logger.info("added " + recipeName + " to index");

            } catch (CorruptIndexException | LockObtainFailedException e) {
                throw new IOException(e);
            }
        }
    }

    public void clearIndex() throws IOException {
        try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) {
            writer.deleteAll();
            writer.commit();
            logger.info("cleared index");
        } catch (CorruptIndexException e) {
            throw new IOException(e);
        }
    }

    public void addAllRecipes() throws SQLException, IOException {
        try (DBGetRecipe dbGetRecipe = new DBGetRecipe();
                IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) {
            writer.deleteAll();
            for (String recipeName : dbGetRecipe.getAllActiveRecipeNames()) {
                try {
                    Document document = generateRecipeDoc(recipeName);
                    writer.addDocument(document);
                } catch (DBRecipe.RecipeNotFoundException e) {
                    //nope
                }
            }
            writer.commit();
            logger.info("successfully build fulltext index");
        }
    }

    public Document generateRecipeDoc(String recipeName) throws SQLException, DBRecipe.RecipeNotFoundException {
        try (DBGetRecipe dbGetRecipe = new DBGetRecipe()) {
            Recipe recipe = dbGetRecipe.get(recipeName);

            List<Step> steps = Steps.loadRecipeSteps(recipeName);
            StringBuilder stepText = new StringBuilder();
            for (Step step : steps) {
                stepText.append(" ").append(step.getText());
            }

            int id = dbGetRecipe.getActiveIdfromRecipe(recipeName);
            String date = DateTools.dateToString(new Date(), Resolution.DAY);

            Document doc = new Document();
            doc.add(new TextField("title", recipe.getName(), Field.Store.YES));
            doc.add(new TextField("description", recipe.getDescription() == null ? "" : recipe.getDescription(),
                    Field.Store.YES));
            doc.add(new TextField("steps", stepText.toString(), Field.Store.YES));
            doc.add(new IntField("version_id", id, Field.Store.YES));
            doc.add(new TextField("date", date, Field.Store.YES));

            return doc;
        }

    }

    public void removeRecipe(String recipeName) throws IOException {

        try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) {
            writer.deleteDocuments(new Term("title", recipeName));
            writer.commit();

            logger.info("removed " + recipeName + " from index");
        } catch (CorruptIndexException e) {
            throw new IOException(e);
        }
    }

    private boolean checkIfRecipeExists(String recipeName) throws IOException {
        try (IndexReader reader = DirectoryReader.open(index)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            MultiFieldQueryParser.parse(new String[] { recipeName }, new String[] { "title" }, analyzer);
            Query query = new QueryParser("title", analyzer).parse(recipeName);
            TopDocs topdocs = searcher.search(query, 1);
            if (topdocs.totalHits > 0) {
                return true;
            }
        } catch (CorruptIndexException | ParseException e) {
            throw new IOException(e);
        }

        return false;
    }

    public Set<String> search(String q) throws IOException {
        Set<String> recipes = new LinkedHashSet<>();
        String fields[] = new String[] { "description", "steps" };
        logger.debug(String.format("searching for %s", q));

        try (IndexReader reader = DirectoryReader.open(index)) {
            int hitsPerPage = 1000;
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new MultiFieldQueryParser(fields, analyzer).parse(q);
            TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null);
            searcher.search(query, collector);

            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            for (ScoreDoc hit : hits) {
                Document d = searcher.doc(hit.doc);
                recipes.add(d.get("title"));
            }

        } catch (CorruptIndexException | ParseException e) {
            logger.error(e);
        }

        logger.debug(String.format("found %d results", recipes.size()));
        return recipes;
    }

    public Set<String> search(Set<String> queries) throws IOException {
        Set<String> recipeNames = new HashSet<>();
        for (String term : queries) {
            recipeNames.addAll(search(term));
        }

        return recipeNames;
    }
}