edu.pucp.igc.piscosemanticsearch.Buscador.java Source code

Java tutorial

Introduction

Here is the source code for edu.pucp.igc.piscosemanticsearch.Buscador.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package edu.pucp.igc.piscosemanticsearch;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 *
 * @author ilet
 */
public class Buscador {
    //    private String carpetaIndice = "//Users//NuSs//Documents//workspaces//NetbeansWorkspace//IndiceSS";

    private File fileCarpetaIndice;
    private IndexSearcher indexSearcher;
    private Directory directorio;
    private DirectoryReader directorioLectura;
    private SpanishAnalyzer analizador;

    public Buscador(File fileCarpetaIndice) {
        this.fileCarpetaIndice = fileCarpetaIndice;
    }

    public Buscador(HttpServletRequest req, HttpServletResponse resp) {
        String path = req.getServletContext().getRealPath("/");
        fileCarpetaIndice = new File(path + "indice");
    }

    public void crearBuscador() throws IOException {
        directorio = FSDirectory.open(fileCarpetaIndice);
        directorioLectura = DirectoryReader.open(directorio);
        indexSearcher = new IndexSearcher(directorioLectura);
        analizador = new SpanishAnalyzer();
    }

    public void cerrarBuscador() throws IOException {
        directorioLectura.close();
    }

    public ResultadosDeBusqueda buscar(String[] textosABuscar, String[] camposAConsultar)
            throws ParseException, IOException {

        ScoreDoc[] listaScoreDoc;
        Query query = MultiFieldQueryParser.parse(textosABuscar, camposAConsultar, new SpanishAnalyzer());

        //        System.out.println("Busqueda en el campo " + Utilities.collapseStrings(camposAConsultar, " ") + ": " + query);
        listaScoreDoc = indexSearcher.search(query, 100).scoreDocs;

        ResultadosDeBusqueda resultados = new ResultadosDeBusqueda(ScoreDocsToDocumentos(listaScoreDoc),
                textosABuscar[0], query.toString());
        //        visualizarDocumentos(listaScoreDoc);
        return resultados;

    }

    public ResultadosDeBusqueda buscar(String textoABuscar, String campoAConsultar)
            throws ParseException, IOException {
        return buscar(new String[] { textoABuscar }, new String[] { campoAConsultar });
    }

    public ResultadosDeBusqueda busquedaTextual(String textoABuscar) throws ParseException, IOException {
        return buscar(new String[] { textoABuscar, textoABuscar }, new String[] { "texto", "titulo" });
    }

    public ResultadosDeBusqueda busquedaSemantica(String textoABuscar) throws ParseException, IOException {
        return buscar(new String[] { textoABuscar }, new String[] { "clases" });
    }

    public ResultadosDeBusqueda busquedaMultiple(String[] textosABuscar, String[] camposAConsultar)
            throws ParseException, IOException {
        return buscar(textosABuscar, camposAConsultar);
    }

    public List<Documento> ScoreDocsToDocumentos(ScoreDoc[] listaScoreDoc) throws IOException {
        List<Documento> Documentos = new ArrayList<>();
        for (ScoreDoc scoreDoc : listaScoreDoc) {
            Document d = indexSearcher.doc(scoreDoc.doc);
            Documento documento = new Documento(d.get("baseURL"), d.get("titulo"), d.get("texto"), d.get("clases"));
            Documentos.add(documento);
        }
        return Documentos;
    }

    public ScoreDoc[] buscarDocumentos(String textoABuscar) throws ParseException, IOException {

        ScoreDoc[] listaScoreDoc;
        QueryParser parser = new QueryParser("texto", analizador);

        Query query = parser.parse(textoABuscar);
        System.out.println("Busqueda textual: " + query);
        listaScoreDoc = indexSearcher.search(query, 100).scoreDocs;

        visualizarDocumentos(listaScoreDoc);
        return listaScoreDoc;

    }

    public ScoreDoc[] buscarDocumentosDePisco(String textoABuscar) {
        ScoreDoc[] listaScoreDoc = null;
        try {

            //        QueryParser parser = new QueryParser("pisco", analizador);
            Query query = MultiFieldQueryParser.parse(new String[] { textoABuscar }, new String[] { "pisco" },
                    new SpanishAnalyzer());

            //        Query query = parser.parse(textoABuscar);
            //            System.out.println("Busqueda textual de Pisco: " + query);
            listaScoreDoc = indexSearcher.search(query, 1000).scoreDocs;

            visualizarDocumentosDePisco(listaScoreDoc);

        } catch (ParseException ex) {
            Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
        }
        return listaScoreDoc;

    }

    public List<String> buscarDocumentosPorClase(String textoABuscar) {
        ScoreDoc[] listaScoreDoc = null;
        List<String> listaStringDoc = null;
        try {
            // Cuando un termino buscado no posea alguna caracteristica para evitar errores se retornar\'a null
            if (textoABuscar.compareTo("") == 0) {
                return null;
            }
            // En otros casos se retornar\'a la lista de documentos

            QueryParser parser = new QueryParser("clase", analizador);

            Query query = parser.parse(textoABuscar);
            System.out.println("Semntica: " + query);
            listaScoreDoc = indexSearcher.search(query, 100).scoreDocs;

            listaStringDoc = visualizarDocumentosDePisco(listaScoreDoc);

        } catch (ParseException ex) {
            Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
        }
        //    return listaScoreDoc;
        return listaStringDoc;
    }

    private void visualizarDocumentos(ScoreDoc[] listaScoreDoc) throws IOException {
        for (ScoreDoc scoreDoc : listaScoreDoc) {
            Document document = indexSearcher.doc(scoreDoc.doc);
            String texto = document.get("texto");
            System.out.println(texto);
        }
    }

    private List<String> visualizarDocumentosDePisco(ScoreDoc[] listaScoreDoc) throws IOException {
        List<String> listaDeDocumentos = new ArrayList<String>();
        for (ScoreDoc scoreDoc : listaScoreDoc) {
            Document document = indexSearcher.doc(scoreDoc.doc);
            String texto = document.get("pisco");
            listaDeDocumentos.add(texto);
            System.out.println(texto);

        }
        return listaDeDocumentos;
    }

}