drakkar.mast.retrieval.LuceneContext.java Source code

Introduction

Here is the source code for drakkar.mast.retrieval.LuceneContext.java
Source

/*
 * DrakkarKeel - An Enterprise Collaborative Search Platform
 *
 * The contents of this file are subject under the terms described in the
 * DRAKKARKEEL_LICENSE file included in this distribution; you may not use this
 * file except in compliance with the License. 
 *
 * 2013-2014 DrakkarKeel Platform.
 */
package drakkar.mast.retrieval;

import drakkar.oar.DocumentMetaData;
import drakkar.oar.facade.event.FacadeDesktopListener;
import static drakkar.oar.util.KeyField.*;
import static drakkar.oar.util.KeyMessage.*;
import drakkar.oar.util.KeySearchable;
import drakkar.oar.util.OutputMonitor;
import drakkar.oar.util.Utilities;
import drakkar.mast.IndexException;
import drakkar.mast.SearchException;
import drakkar.mast.recommender.CollectionInfo;
import drakkar.mast.recommender.DocInfo;
import drakkar.mast.recommender.DocTermInfo;
import drakkar.mast.recommender.TermInfo;
import drakkar.mast.retrieval.analysis.NGramAnalyzer;
import drakkar.mast.retrieval.analysis.NGramAnalyzerCaseSensitive;
import drakkar.mast.retrieval.analysis.NGramQuery;
import drakkar.mast.retrieval.analysis.StopStemAnalyzer;
import drakkar.mast.retrieval.analysis.StopStemAnalyzerCaseSensitive;
import drakkar.mast.retrieval.analysis.WikiAnalyzer;
import drakkar.mast.retrieval.analysis.WikiCaseSensitiveAnalyzer;
import drakkar.mast.retrieval.parser.JavaParser;
import drakkar.mast.retrieval.parser.PdfParser;
import com.thoughtworks.qdox.parser.ParseException;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * context del motor de bsqueda Apache Lucene
 */
public class LuceneContext extends AdvEngineContext {

    private IndexSearcher searcher;
    private boolean appendIndex;
    private IndexWriter writer;
    private IndexWriter writerLSI;
    private PerFieldAnalyzerWrapper fieldAnalyzer;
    private PerFieldAnalyzerWrapper fieldAnalyzerCS;
    private ScoreDoc[] scoreDoc;
    private ScoreDoc scoreDocObj;
    private Query queryq;
    private Query[] queryall;
    private Directory directory;
    private IndexReader reader;
    private TopDocs topDocs;
    private Highlighter hg;
    private Document docum = null;
    private TokenStream tokens;

    /**
     * Default Constructor
     */
    public LuceneContext() {
        defaultIndexPath = "./index/lucene/";
        defaultIndexLSIPath = "./index/lsi/lucene/";
        this.applyLSI = false;

    }

    /**
     * constructor
     *
     * @param listener oyente de los procesos realizados por este motor
     */
    public LuceneContext(FacadeDesktopListener listener) {
        super(listener);
        defaultIndexPath = "./index/lucene/";
        defaultIndexLSIPath = "./index/lsi/lucene/";
        this.applyLSI = false;

    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, boolean caseSensitive) throws SearchException {
        ArrayList<DocumentMetaData> finalResultsList = null;
        this.finalMetaResult = new ArrayList<DocumentMetaData>();
        this.scoreDoc = null;
        this.queryq = null;
        String[] codeAndBooks;
        this.queryall = new Query[5];

        try {
            setStartTimeOfSearch(new Date());

            if (IndexReader.indexExists(FSDirectory.open(this.indexPath))) {

                this.directory = FSDirectory.open(this.indexPath);
                this.reader = IndexReader.open(this.directory);
                this.searcher = new IndexSearcher(this.reader);
                //////////////////////////////////////////////
                try {

                    if (caseSensitive == false) {
                        codeAndBooks = new String[5];
                        codeAndBooks[0] = getDocumentField(FIELD_CODE_ALL_COMMENTS);
                        codeAndBooks[1] = getDocumentField(FIELD_CODE_ALL_SOURCE);
                        codeAndBooks[2] = getDocumentField(FIELD_DOC_TEXT);
                        codeAndBooks[3] = getDocumentField(FIELD_NAME);
                        codeAndBooks[4] = getDocumentField(FIELD_DOC_BOOK);

                        this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new StopStemAnalyzer()));
                        this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_SOURCE),
                                new NGramAnalyzer());
                        this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_NAME), new NGramAnalyzer());

                        String fieldToProcess;
                        for (int i = 0; i < codeAndBooks.length; i++) {
                            fieldToProcess = codeAndBooks[i];

                            this.queryq = new NGramQuery(this.getFieldAnalyzer(), query, fieldToProcess);
                            this.queryall[i] = this.queryq;
                            this.queryq = this.queryq.combine(this.queryall);

                        }

                    } else if (caseSensitive == true) {
                        codeAndBooks = new String[5];
                        codeAndBooks[0] = getDocumentFieldCS(FIELD_CODE_ALL_COMMENTS);
                        codeAndBooks[1] = getDocumentFieldCS(FIELD_CODE_ALL_SOURCE);
                        codeAndBooks[2] = getDocumentFieldCS(FIELD_DOC_TEXT);
                        codeAndBooks[3] = getDocumentFieldCS(FIELD_NAME);
                        codeAndBooks[4] = getDocumentFieldCS(FIELD_DOC_BOOK);

                        this.setFieldAnalyzerCS(new PerFieldAnalyzerWrapper(new StopStemAnalyzerCaseSensitive()));
                        this.getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_CODE_ALL_SOURCE),
                                new NGramAnalyzerCaseSensitive());
                        this.getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_NAME),
                                new NGramAnalyzerCaseSensitive());

                        String fieldToProcess;
                        for (int i = 0; i < codeAndBooks.length; i++) {
                            fieldToProcess = codeAndBooks[i];

                            this.queryq = new NGramQuery(this.getFieldAnalyzerCS(), query, fieldToProcess);
                            this.queryall[i] = this.queryq;
                            this.queryq = this.queryq.combine(this.queryall);
                        }
                    }

                    //Finds the top n  hits for query, applying filter if non-null.
                    this.topDocs = this.searcher.search(this.queryq, null, 1000);
                    int totalHits = this.topDocs.totalHits;
                    this.retrievedDocsCount = totalHits;
                    this.scoreDoc = this.topDocs.scoreDocs;
                    setEndTimeOfSearch(new Date());
                    String message = "Lucene retrieved " + totalHits + " document(s) (in " + getSearchTime()
                            + " milliseconds) that matched query '" + query + "'.";
                    OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                    this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                    //save results
                    this.finalMetaResult = saveResults(this.scoreDoc, caseSensitive, this.queryq);
                    //delete repeated
                    if (this.finalMetaResult.size() > 1) {
                        deleteRepeated(this.finalMetaResult);
                    }
                    finalResultsList = this.finalMetaResult;

                } catch (IOException ex) {
                    String message = "Class: SearchEngineLucene." + " Method: searchFiles(String query)."
                            + "  Error: " + ex.getMessage();
                    this.notifyTaskProgress(ERROR_MESSAGE, message);
                    throw new SearchException(ex.getMessage());
                }

            } else {
                OutputMonitor.printLine("Index path incorrect", OutputMonitor.ERROR_MESSAGE);
                this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
            }

        } catch (CorruptIndexException ex) {
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        } catch (IOException ex) {
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        }

        this.retrievedDocsCount += finalResultsList.size();
        return finalResultsList;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String docType, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> finalResultsList = null;

        try {
            setStartTimeOfSearch(new Date());

            if (IndexReader.indexExists(FSDirectory.open(this.indexPath))) {

                this.directory = FSDirectory.open(this.indexPath);
                this.reader = IndexReader.open(this.directory);
                this.searcher = new IndexSearcher(this.reader);
                ////////////////////////////////////////////

                ArrayList<DocumentMetaData> tempList = search(query, caseSensitive); //busca en toda la coleccin de documentos
                finalResultsList = this.filterMetaDocuments(docType, tempList);

                this.finalMetaResult = finalResultsList;
                setEndTimeOfSearch(new Date());
                String message = "Lucene retrieved " + finalResultsList.size() + " document(s) (in "
                        + getSearchTime() + " milliseconds) that matched query '" + query + "'. for doctype "
                        + docType;
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);

            } else {
                OutputMonitor.printLine("Index path incorrect", OutputMonitor.ERROR_MESSAGE);
                this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
            }

        } catch (CorruptIndexException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        }

        this.retrievedDocsCount += finalResultsList.size();
        return finalResultsList;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String[] docTypes, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> finalResultsList = null;
        ArrayList<DocumentMetaData> tempList = null;

        setStartTimeOfSearch(new Date());

        for (int i = 0; i < docTypes.length; i++) {
            String doc = docTypes[i];
            tempList = search(query, doc, caseSensitive);
            finalResultsList.addAll(tempList);
        }

        if (finalResultsList.size() > 1) {
            deleteRepeated(finalMetaResult);
        }

        String message = "Lucene retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime()
                + " milliseconds) that matched query '" + query + "'. for doctypes ";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);
        setEndTimeOfSearch(new Date());

        this.retrievedDocsCount += finalResultsList.size();
        return finalResultsList;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, int field, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> finalResultsList = null;

        try {
            if (IndexReader.indexExists(FSDirectory.open(this.indexPath))) {

                this.directory = FSDirectory.open(this.indexPath);
                this.reader = IndexReader.open(this.directory);
                this.searcher = new IndexSearcher(this.reader);
                ////////////////////////////////////////////
                this.finalMetaResult = new ArrayList<DocumentMetaData>();
                this.queryq = null;
                setStartTimeOfSearch(new Date());
                try {
                    if (caseSensitive == false) {
                        this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new StopStemAnalyzer()));
                        this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_SOURCE),
                                new NGramAnalyzer());
                        this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_NAME), new NGramAnalyzer());

                        //                        this.fieldAnalyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer());
                        //                        this.fieldAnalyzer.addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzer());
                        String fieldToProcess = getDocumentField(field);
                        this.queryq = new NGramQuery(this.getFieldAnalyzer(), query, fieldToProcess);

                    } else if (caseSensitive == true) {
                        this.setFieldAnalyzerCS(new PerFieldAnalyzerWrapper(new StopStemAnalyzerCaseSensitive()));
                        this.getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_CODE_ALL_SOURCE),
                                new NGramAnalyzerCaseSensitive());
                        this.getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_NAME),
                                new NGramAnalyzerCaseSensitive());

                        //                        this.fieldAnalyzerCS = new PerFieldAnalyzerWrapper(new NGramAnalyzerCaseSensitive());
                        //                        this.fieldAnalyzerCS.addAnalyzer(getDocumentFieldCS(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzerCaseSensitive());
                        String fieldToProcess = getDocumentFieldCS(field);
                        this.queryq = new NGramQuery(this.getFieldAnalyzerCS(), query, fieldToProcess);
                    }

                    this.topDocs = this.searcher.search(this.queryq, null, 1000); //Finds the top n  hits for query, applying filter if non-null.
                    int totalHits = this.topDocs.totalHits;
                    this.retrievedDocsCount = totalHits;
                    this.scoreDoc = this.topDocs.scoreDocs;
                    setEndTimeOfSearch(new Date());
                    String message = "Lucene retrieved " + totalHits + " document(s) (in " + getSearchTime()
                            + " milliseconds) that matched query '" + query + "'.";
                    OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                    this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                    this.finalMetaResult = saveResults(this.scoreDoc, caseSensitive, this.queryq);
                    if (this.finalMetaResult.size() > 1) {
                        deleteRepeated(this.finalMetaResult);
                    }
                    finalResultsList = this.finalMetaResult;

                } catch (IOException ex) {
                    String message = "Class: SearchEngineLucene." + " Method: searchFiles(String query)."
                            + "  Error: " + ex.getMessage();
                    OutputMonitor.printStream(message, ex);
                    this.notifyTaskProgress(ERROR_MESSAGE, message);
                    throw new SearchException(ex.getMessage());
                }
            } else {
                OutputMonitor.printLine("Index path incorrect", OutputMonitor.ERROR_MESSAGE);
                this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
            }

        } catch (CorruptIndexException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        }

        this.retrievedDocsCount += finalResultsList.size();
        return finalResultsList;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, int[] fields, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> tempList = null;
        ArrayList<DocumentMetaData> documents = new ArrayList<DocumentMetaData>();
        int field;
        setStartTimeOfSearch(new Date());

        for (int i = 0; i < fields.length; i++) {
            field = fields[i];
            tempList = this.search(query, field, caseSensitive);
            documents.addAll(tempList);
        }

        if (documents.size() > 1) {
            deleteRepeated(documents);
        }
        setEndTimeOfSearch(new Date());

        String message = "Lucene retrieved " + documents.size() + " document(s) (in " + getSearchTime()
                + " milliseconds) that matched query '" + query + "'. for fields ";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);

        this.retrievedDocsCount += documents.size();
        return documents;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String docType, int field, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> docsResult = new ArrayList<DocumentMetaData>();
        ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
        setStartTimeOfSearch(new Date());
        try {
            if (IndexReader.indexExists(FSDirectory.open(this.indexPath))) {
                this.directory = FSDirectory.open(this.indexPath);
                this.reader = IndexReader.open(this.directory);
                this.searcher = new IndexSearcher(this.reader);
                ////////////////////////////////////////////
                docsResult = search(query, field, caseSensitive);

                finalResultsList = this.filterMetaDocuments(docType, docsResult);
                this.finalMetaResult = finalResultsList;
                setEndTimeOfSearch(new Date());
                String message = "Lucene retrieved " + finalResultsList.size() + " document(s) (in "
                        + getSearchTime() + " milliseconds) that matched query '" + query
                        + "'for field and docType";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                this.retrievedDocsCount += finalResultsList.size();
            } else {
                OutputMonitor.printLine("Index path incorrect", OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
            }
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
        }

        return finalResultsList;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String docType, int[] fields, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> tempList = null;
        ArrayList<DocumentMetaData> documents = new ArrayList<DocumentMetaData>();

        try {

            setStartTimeOfSearch(new Date());
            if (IndexReader.indexExists(FSDirectory.open(this.indexPath))) {
                this.directory = FSDirectory.open(this.indexPath);
                this.reader = IndexReader.open(this.directory);
                this.searcher = new IndexSearcher(this.reader);
                /////////////////////////////////////////////
                String docSource;
                for (int i = 0; i < this.documentalSource.size(); i++) {
                    docSource = this.documentalSource.get(i);
                    if (docSource.equalsIgnoreCase(docType)) {
                        if (fields != null && fields.length > 0) {
                            for (Integer field : fields) {
                                if (field != 0) {
                                    tempList = search(query, docType, field, caseSensitive);

                                    if (tempList != null) {
                                        documents.addAll(tempList);
                                    }
                                }
                            }
                            // esto es para eliminar los documentos repetidos.
                            this.deleteRepeated(documents);
                        } else {
                            tempList = search(query, docType, caseSensitive);
                            this.retrievedDocsCount += tempList.size();
                            return tempList;
                        }
                    } else if (docType == null) {
                        tempList = search(query, caseSensitive);
                        this.retrievedDocsCount += tempList.size();
                        return tempList;
                    }
                }

            } else {
                OutputMonitor.printLine("Index path incorrect", OutputMonitor.ERROR_MESSAGE);
                this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
            }

        } catch (CorruptIndexException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());

        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            throw new SearchException(ex.getMessage());
        }

        setEndTimeOfSearch(new Date());
        String message = "Lucene retrieved " + documents.size() + " document(s) (in " + getSearchTime()
                + " milliseconds) that matched query '" + query + "'for fields and doctype";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);

        this.retrievedDocsCount += documents.size();
        return documents;

    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String[] docTypes, int field, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> tempList = null;
        ArrayList<DocumentMetaData> documents = new ArrayList<DocumentMetaData>();
        String doc;
        setStartTimeOfSearch(new Date());

        for (int i = 0; i < docTypes.length; i++) {
            doc = docTypes[i];
            if (doc.equals("documents")) {
                tempList = search(query, doc, caseSensitive);
            } else {
                tempList = this.search(query, doc, field, caseSensitive);
            }

            documents.addAll(tempList);
        }

        if (documents.size() > 1) {
            deleteRepeated(documents);
        }

        String message = "Lucene retrieved " + documents.size() + " document(s) (in " + getSearchTime()
                + " milliseconds) that matched query '" + query + "'. for doctypes and field ";

        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);
        setEndTimeOfSearch(new Date());

        this.retrievedDocsCount += documents.size();
        return documents;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ArrayList<DocumentMetaData> search(String query, String[] docTypes, int[] fields, boolean caseSensitive)
            throws SearchException {
        ArrayList<DocumentMetaData> tempList = null;
        ArrayList<DocumentMetaData> documents = new ArrayList<DocumentMetaData>();
        String doc;
        setStartTimeOfSearch(new Date());

        for (int i = 0; i < docTypes.length; i++) {
            doc = docTypes[i];
            if (doc.equals("documents")) {
                tempList = search(query, doc, caseSensitive);
            } else {
                tempList = this.search(query, doc, fields, caseSensitive);
            }

            documents.addAll(tempList);
        }

        if (documents.size() > 1) {
            deleteRepeated(documents);
        }

        String message = "Lucene retrieved " + documents.size() + " document(s) (in " + getSearchTime()
                + " milliseconds) that matched query '" + query + "'. for doctypes and fields ";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);
        setEndTimeOfSearch(new Date());

        this.retrievedDocsCount += documents.size();
        return documents;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long makeIndex() throws IndexException {
        this.indexPath = new File(this.defaultIndexPath);
        if (applyLSI) {
            this.indexLSIPath = new File(this.defaultIndexLSIPath);
        }

        this.collectionPath = new File(this.defaultCollectionPath);
        long indexedFiles = 0;

        if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
            String message = collectionPath + "does not exist or is empty";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (this.indexPath != null) {
            indexedFiles = this.build(MAKE_INDEX);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long makeIndex(File collectionPath) throws IndexException {

        this.indexPath = new File(this.defaultIndexPath);
        if (applyLSI) {
            this.indexLSIPath = new File(this.defaultIndexLSIPath);
        }
        this.collectionPath = collectionPath;
        long indexedFiles = 0;
        if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
            String message = collectionPath + "does not exist or is empty";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (this.indexPath != null) {
            indexedFiles = this.build(MAKE_INDEX);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long makeIndex(List<File> collectionPath) throws IndexException {
        this.indexPath = new File(this.defaultIndexPath);
        this.indexLSIPath = new File(this.defaultIndexLSIPath);
        long indexedFiles = 0;

        if (collectionPath.isEmpty()) {
            OutputMonitor.printLine("The collection does not have files", OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
            throw new IndexException("The collection does not have files");
        } else if (this.indexPath != null) {
            indexedFiles = this.build(collectionPath, MAKE_INDEX);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long makeIndex(File collectionPath, File indexPath) throws IndexException {

        this.indexPath = indexPath;
        if (applyLSI) {
            this.indexLSIPath = new File(this.defaultIndexLSIPath);
        }
        this.collectionPath = collectionPath;
        long indexedFiles = 0;

        if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
            String message = collectionPath + "does not exist or is empty";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (indexPath != null) {
            indexedFiles = this.build(MAKE_INDEX);
        } else {
            String message = "indexPath is null";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long makeIndex(List<File> collectionPath, File indexPath) throws IndexException {

        this.indexPath = indexPath;
        if (applyLSI) {
            this.indexLSIPath = new File(this.defaultIndexLSIPath);
        }
        long indexedFiles = 0;
        if (collectionPath.isEmpty()) {
            OutputMonitor.printLine("The collection does not have files", OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
            throw new IndexException("The collection does not have files");
        } else if (this.indexPath != null) {
            indexedFiles = this.build(collectionPath, MAKE_INDEX);
        } else {
            String message = "indexPath is null";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;

    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long updateIndex(File collectionPath) throws IndexException {
        this.indexPath = new File(this.defaultIndexPath);
        this.collectionPath = collectionPath;
        long indexedFiles = 0;
        if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
            String message = collectionPath + "does not exist or is empty";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (this.indexPath != null) {
            indexedFiles = this.build(ADD_INDEX);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long updateIndex(List<File> collectionPath) throws IndexException {
        this.indexPath = new File(this.defaultIndexPath);
        long indexedFiles = 0;

        if (collectionPath.isEmpty()) {
            OutputMonitor.printLine("The collection does not have files", OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
            throw new IndexException("The collection does not have files");
        } else if (this.indexPath != null) {
            indexedFiles = this.build(collectionPath, ADD_INDEX);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long updateIndex(File collectionPath, File indexPath) throws IndexException {
        this.indexPath = indexPath;
        this.collectionPath = collectionPath;
        long indexedFiles = 0;
        if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
            String message = collectionPath + "does not exist or is empty";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (indexPath != null) {
            indexedFiles = this.build(ADD_INDEX);
        } else {
            String message = "IndexPath is null";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long updateIndex(List<File> collectionPath, File indexPath) throws IndexException {
        this.indexPath = indexPath;
        long indexedFiles = 0;
        String message;
        if (collectionPath.isEmpty()) {
            message = "The collection does not have files";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } else if (this.indexPath != null) {
            indexedFiles = this.build(collectionPath, ADD_INDEX);
        } else {
            message = "indexPath is null";
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        this.indexedDocsCount += indexedFiles;

        return indexedFiles;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean loadIndex() throws IndexException {
        this.reader = null;
        boolean flag = false;
        File defaultFile = new File(this.defaultIndexPath);
        if (applyLSI) {
            this.indexLSIPath = new File(this.defaultIndexLSIPath);
        }
        String message;
        try {
            if (!defaultFile.isDirectory() || !defaultFile.exists() || defaultFile == null
                    || IndexReader.indexExists(FSDirectory.open(defaultFile)) == false) {
                message = "Not found index in default index path";
                OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
                throw new IndexException(message);

            } else {

                this.reader = IndexReader.open(FSDirectory.open(defaultFile));
                int cant = this.reader.numDocs();
                this.reader.close();

                message = "Loading Lucene...";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException ex) {
                    OutputMonitor.printStream("", ex);
                }
                message = "Total of documents of the index: " + cant;
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                flag = true;
                this.notifyLoadedDocument(cant);

                //set path for search
                this.indexPath = defaultFile;

                initLSIManager();
            }

        } catch (CorruptIndexException ex) {
            message = "Class: SearchEngineLucene" + " Method: LoadIndex" + "  Error: " + ex.getMessage();
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } catch (IOException ex) {
            message = "Class: SearchEngineLucene" + " Method: LoadIndex" + "  Error: " + ex.getMessage();
            OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }
        return flag;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean loadIndex(File indexPath) throws IndexException {
        String message;
        try {
            if (applyLSI) {
                this.indexLSIPath = new File(this.defaultIndexLSIPath);
            }
            this.reader = null;

            if (!indexPath.isDirectory() || !indexPath.exists() || indexPath == null
                    || IndexReader.indexExists(FSDirectory.open(indexPath)) == false) {
                message = "Not found index in this directory: " + indexPath.getAbsolutePath();
                throw new IndexException(message);

            } else {

                this.reader = IndexReader.open(FSDirectory.open(indexPath));
                int cant = this.reader.numDocs();
                this.reader.close();

                message = "Loading Lucene...";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException ex) {
                    OutputMonitor.printStream("", ex);
                }

                message = "Total of documents of the index: " + cant;
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                this.notifyLoadedDocument(cant);

                //set path for search
                this.indexPath = indexPath;

                return true;
            }

        } catch (CorruptIndexException ex) {
            message = "Class: SearchEngineLucene" + " Method: LoadIndex" + "  Error: " + ex.getMessage();
            OutputMonitor.printStream(message, ex);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        } catch (IOException ex) {
            message = "Class: SearchEngineLucene" + " Method: LoadIndex" + "  Error: " + ex.getMessage();
            OutputMonitor.printStream(message, ex);
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);

        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean safeToBuildIndex(File indexPath, int operation) throws IndexException {

        boolean flag = true;
        try {
            this.appendIndex = false;
            //File idx = indexPath;
            String idxpath = indexPath.getPath();
            File dir = indexPath.getParentFile();
            String message = null;
            if (!dir.exists()) {
                if (!dir.mkdirs()) {
                    //ensure that the index folder exists
                    flag = false;
                    message = "Could not create the index folders at: " + dir.getPath() + ".\n"
                            + "Aborting indexing process.";
                    OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
                    this.notifyTaskProgress(ERROR_MESSAGE, message);
                    throw new IndexException(message);

                }
            } else if (IndexReader.indexExists(FSDirectory.open(indexPath))) {

                switch (operation) {
                case MAKE_INDEX:
                    message = "Overwriting index " + idxpath + "\n";
                    OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                    this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                    deleteFiles(indexPath);
                    flag = true;
                    break;

                case ADD_INDEX:

                    message = "Appending new files to index " + idxpath + "\n";
                    OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                    this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                    this.appendIndex = true;
                    flag = true;
                    break;

                default:
                    message = "Not building index " + idxpath + "\n";
                    OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
                    this.notifyTaskProgress(ERROR_MESSAGE, message);
                    flag = false;
                    throw new IndexException(message);

                }
            } else if (operation == ADD_INDEX) {
                flag = false;
                message = "Not exist Lucene index  in this address" + indexPath;
                this.notifyTaskProgress(ERROR_MESSAGE, message);
                throw new IndexException(message);
            }
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
        }
        return flag;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String getDocumentField(int field) {

        switch (field) {
        case FIELD_FILEPATH:
            return "filepath";
        case FIELD_NAME:
            return "name";
        case FIELD_CODE_PACKAGE:
            return "package";
        case FIELD_CODE_CLASSES_NAMES:
            return "classesnames";
        case FIELD_CODE_METHODS_NAMES:
            return "methodsnames";
        case FIELD_CODE_ALL_COMMENTS:
            return "allcomments";
        case FIELD_CODE_ALL_SOURCE:
            return "allsource";//todo el contenido del codigo
        case FIELD_CODE_VARIABLES_NAMES:
            return "classesvariables";
        case FIELD_CODE_JAVADOCS:
            return "javadocs";
        case FIELD_DOC_TEXT:
            return "content"; //todo el contenido del doc txt
        case FIELD_DOC_BOOK:
            return "book";
        case AUTHOR_DOCUMENTS:
            return "author";
        case LAST_MODIFIED_DOCUMENTS:
            return "lastModified";
        default:
            return null;
        }

    }

    /**
     * Devuelve los campos correspondientes cuando casesensitive es true
     *
     * @param field
     * @return
     */
    public String getDocumentFieldCS(int field) {

        switch (field) {
        case FIELD_FILEPATH:
            return "filepathcs";
        case FIELD_NAME:
            return "namecs";
        case FIELD_CODE_PACKAGE:
            return "packagecs";
        case FIELD_CODE_CLASSES_NAMES:
            return "classesnamescs";
        case FIELD_CODE_METHODS_NAMES:
            return "methodsnamescs";
        case FIELD_CODE_ALL_COMMENTS:
            return "allcommentscs";
        case FIELD_CODE_ALL_SOURCE:
            return "allsourcecs";
        case FIELD_CODE_VARIABLES_NAMES:
            return "classesvariablescs";
        case FIELD_CODE_JAVADOCS:
            return "javadocscs";
        case FIELD_DOC_TEXT:
            return "contentcs";
        case FIELD_DOC_BOOK:
            return "bookcs";

        default:
            return null;
        }

    }

    /**
     * Mtodo para construir el ndice con la coleccin por defecto
     *
     * @param operacin a realizar: MAKE o ADD
     */
    private long build(int operation) throws IndexException {
        long indexedFiles = 0;
        String message = "Lucene index will be created at [" + this.indexPath + "]";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);
        //inicia la indexacion
        try {
            if (safeToBuildIndex(this.indexPath, operation)) {
                setStartTimeOfIndexation(new Date());
                // this.analyzer = new NGramAnalyzer();

                //TODO I changed theses lines 2012-11-12
                //                this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer()));
                //                this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzer());
                if (this.appendIndex) {
                    //Adding: new docs
                    this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), false,
                            IndexWriter.MaxFieldLength.UNLIMITED);
                    if (applyLSI) {
                        this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath),
                                new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), false,
                                IndexWriter.MaxFieldLength.UNLIMITED);
                    }

                    // ("number "+writer.getReader().maxDoc());
                } else {
                    //create or overwrite index
                    this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), true,
                            IndexWriter.MaxFieldLength.UNLIMITED);
                    if (applyLSI) {
                        this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath),
                                new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), true,
                                IndexWriter.MaxFieldLength.UNLIMITED);
                    }

                }

                indexedFiles = indexDocs(this.writer, this.writerLSI, this.collectionPath, operation);

                message = "Optimizing...";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                this.writer.optimize();
                this.writer.close();
                if (applyLSI) {
                    this.writerLSI.optimize();
                    this.writerLSI.close();
                }

                setEndTimeOfIndexation(new Date());
                message = "Indexation Time " + this.getIndexationTime() + " milliseconds.";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
            }
        } catch (IOException e) {
            message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + ".";
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        initLSIManager(); // inicializar la matriz de LSI

        return indexedFiles;
    }

    private void initLSIManager() {

        if (applyLSI) {
            Thread t = new Thread(new Runnable() {
                public void run() {
                    try {
                        CollectionInfo collectionInfo = getCollectionInfo();
                        lsiManager.setInitValues(collectionInfo);
                    } catch (IndexException ex) {
                        OutputMonitor.printStream("Reading LSI index.", ex);
                    }
                }
            });
            t.start();

        }
    }

    /**
     * Mtodo para construir el ndice a partir de una coleccin de files
     *
     * @param operation ----- operacin a realizar: MAKE o ADD
     * @param collectionPath ----- lista de ficheros que representan la
     * coleccin
     */
    private long build(List<File> collectionPath, int operation) throws IndexException {
        long indexedFiles = 0;

        String message = "Indexing to directory '" + this.indexPath + "'...";
        OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
        this.notifyTaskProgress(INFORMATION_MESSAGE, message);
        //inicia la indexacion
        try {
            setStartTimeOfIndexation(new Date());
            if (safeToBuildIndex(this.indexPath, operation)) {
                // this.analyzer = new NGramAnalyzer();
                this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer()));
                this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS),
                        new StopStemAnalyzer());

                if (this.appendIndex) {
                    //aadir docs a un indice existente
                    this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), false,
                            IndexWriter.MaxFieldLength.UNLIMITED);
                    if (applyLSI) {
                        this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath),
                                new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), false,
                                IndexWriter.MaxFieldLength.UNLIMITED);
                    }

                } else {
                    //crear o sobreescribir
                    this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), true,
                            IndexWriter.MaxFieldLength.UNLIMITED);
                    if (applyLSI) {
                        this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath),
                                new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), true,
                                IndexWriter.MaxFieldLength.UNLIMITED);
                    }
                }
                indexedFiles = indexDocs(this.writer, this.writerLSI, collectionPath, operation);
                message = "Optimizing...";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                this.writer.optimize();
                this.writer.close();
                if (applyLSI) {
                    this.writerLSI.optimize();
                    this.writerLSI.close();
                }

                setEndTimeOfIndexation(new Date());
                message = "Indexation Time " + this.getIndexationTime() + " milliseconds.";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);
            }
        } catch (IOException e) {
            message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + ".";
            this.notifyTaskProgress(ERROR_MESSAGE, message);
            throw new IndexException(message);
        }

        initLSIManager();

        return indexedFiles;
    }

    /**
     * Indexa los documentos que estan en un vector
     *
     * @param writer
     * @param list
     * @throws IOException
     */
    private int indexDocs(IndexWriter writer, IndexWriter writerLSI, List<File> list, int operation)
            throws IndexException {
        int docCount = 0;
        File file;
        String message;
        for (int i = 0; i < list.size(); i++) {
            file = list.get(i);
            if (file.getName().endsWith(".java") || file.getName().endsWith(".pdf")
                    || file.getName().endsWith(".txt") || file.getName().endsWith(".xml")) {
                {
                    indexFile(writer, writerLSI, file, operation);
                    message = "Adding: " + file;
                    docCount++;
                    OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                    this.notifyTaskProgress(INFORMATION_MESSAGE, message);
                }
            } else {
                message = "There are files in the collection that are not: .java, pdf, txt o xml documents" + "\n"
                        + "so, they could not be indexed.";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);

            }
        }
        return docCount;
    }

    /**
     * Indexa los documentos que estan en un file
     */
    private int indexDocs(IndexWriter writer, IndexWriter writerLSI, File file, int operation)
            throws IndexException {
        int docCount = 0;
        String message;
        if (file.canRead()) {
            if (file.isDirectory()) {
                String[] files = file.list();
                this.indexedDocsCount = files.length;
                if (files != null) {

                    for (int i = 0; i < files.length; i++) {
                        indexDocs(writer, writerLSI, new File(file, files[i]), operation);
                    }
                }
            } else if (file.getName().endsWith(".java") || file.getName().endsWith(".pdf")
                    || file.getName().endsWith(".txt") || file.getName().endsWith(".xml")) {
                indexFile(writer, writerLSI, file, operation);
                message = "Adding: " + file;
                docCount++;
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);

            } else {
                message = "There are files in the collection that are not: .java, pdf, txt or xml documents" + "\n"
                        + "so, they could not be indexed.";
                OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
                this.notifyTaskProgress(INFORMATION_MESSAGE, message);

            }
        }
        return docCount;
    }

    /**
     * Indexacion por campo de cada documento del repositorio
     *
     * @param writer
     * @param f
     */
    private void indexFile(IndexWriter writer, IndexWriter writerLSI, File f, int operation) throws IndexException {

        boolean javaFile = false;
        if (f.isHidden() || !f.exists() || !f.canRead()) {
            return;
        }

        DocumentLucene doc = null; //para case insensitive
        DocumentLucene doccs = null; //para case sensitive
        DocumentLucene contentDocLSI = null; //para case contenido lsi
        DocumentLucene pathDocLSI = null; //para case path lsi
        //        String extFile = f.getPath().endsWith(".pdf");
        try {

            setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer()));
            setFieldAnalyzerCS(new PerFieldAnalyzerWrapper(new NGramAnalyzerCaseSensitive()));

            if (f.getPath().endsWith(".pdf")) {

                doc = new DocumentLucene();
                doccs = new DocumentLucene();
                if (applyLSI) {
                    contentDocLSI = new DocumentLucene();
                    pathDocLSI = new DocumentLucene();
                }
                PdfParser pdfp = new PdfParser();
                try {
                    pdfp.divideTextforLucene(f, doccs, doc, contentDocLSI);
                } catch (Exception e) {
                }
                pdfp.analyzePdfDocument(f);
                doc.addField(getDocumentField(AUTHOR_DOCUMENTS), pdfp.getAuthor());
                doccs.addField(getDocumentField(AUTHOR_DOCUMENTS), pdfp.getAuthor());
                try {
                    String date = DateFormat.getDateInstance().format(pdfp.getCalModification().getTime());
                    doc.addField(getDocumentField(LAST_MODIFIED_DOCUMENTS), date);
                    doccs.addField(getDocumentField(LAST_MODIFIED_DOCUMENTS), date);
                } catch (Exception ex) {
                    doc.addField(getDocumentField(LAST_MODIFIED_DOCUMENTS), "");
                    doccs.addField(getDocumentField(LAST_MODIFIED_DOCUMENTS), "");
                }
            } else if (f.getPath().endsWith(".java")) {
                getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzer());
                getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_CODE_ALL_COMMENTS),
                        new StopStemAnalyzerCaseSensitive());
                ArrayList<String> comment = new ArrayList<String>();
                ArrayList<String> javadocs = new ArrayList<String>();
                JavaParser jp = new JavaParser();
                jp.AnalyzeDocument(f);
                doc = new DocumentLucene();
                doccs = new DocumentLucene();
                doc.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                doccs.addField(getDocumentFieldCS(FIELD_FILEPATH), f.getCanonicalPath());

                doc.addField(getDocumentField(FIELD_NAME), f.getName());
                doccs.addField(getDocumentFieldCS(FIELD_NAME), f.getName());

                doc.addField(getDocumentField(FIELD_CODE_ALL_SOURCE), jp.getAllSource());
                doccs.addField(getDocumentFieldCS(FIELD_CODE_ALL_SOURCE), jp.getAllSource());

                if (jp.getClassPackage() != null) {
                    doc.addField(getDocumentField(FIELD_CODE_PACKAGE), jp.getClassPackage());
                    doccs.addField(getDocumentFieldCS(FIELD_CODE_PACKAGE), jp.getClassPackage());
                }

                for (int i = 0; i < jp.getClassNumber(); i++) {
                    if (jp.getClassesComments(i) != null) {
                        comment.add(jp.getClassesComments(i));
                    }
                    if (jp.getClassesJDocs(i) != null) {
                        javadocs.add(jp.getClassesJDocs(i));
                    }

                    doc.addField(getDocumentField(FIELD_CODE_CLASSES_NAMES), jp.getClassesNames(i));
                    doccs.addField(getDocumentFieldCS(FIELD_CODE_CLASSES_NAMES), jp.getClassesNames(i));

                    for (int l = 0; l < jp.getClassVariableNumber(); l++) {
                        doc.addField(getDocumentField(FIELD_CODE_VARIABLES_NAMES), jp.getClassesVarName(i, l));
                        doccs.addField(getDocumentFieldCS(FIELD_CODE_VARIABLES_NAMES), jp.getClassesVarName(i, l));
                        if (jp.getClassesCommentVariables(i, l) != null) {
                            comment.add(jp.getClassesCommentVariables(i, l));
                        }
                        if (jp.getVariablesJDocs(i, l) != null) {
                            javadocs.add(jp.getVariablesJDocs(i, l));
                        }

                    }

                    for (int j = 0; j < jp.getClassesMethods(i); j++) {
                        if (jp.getClassesMethodComment(i, j) != null) {
                            comment.add(jp.getClassesMethodComment(i, j));

                        }
                        if (jp.getClassesMethodJDocs(i, j) != null) {
                            javadocs.add(jp.getClassesMethodJDocs(i, j));
                        }

                        doc.addField(getDocumentField(FIELD_CODE_METHODS_NAMES), jp.getClassesMethodsName(i, j));
                        doccs.addField(getDocumentFieldCS(FIELD_CODE_METHODS_NAMES),
                                jp.getClassesMethodsName(i, j));

                    }
                }
                //unir los comentarios
                doc.addField(getDocumentField(FIELD_CODE_ALL_COMMENTS), joinData(comment));
                doccs.addField(getDocumentFieldCS(FIELD_CODE_ALL_COMMENTS), joinData(comment));

                //unir los javadocs
                doc.addField(getDocumentField(FIELD_CODE_JAVADOCS), joinData(javadocs));
                doccs.addField(getDocumentFieldCS(FIELD_CODE_JAVADOCS), joinData(javadocs));

                ///////////////////
                if (applyLSI) {
                    contentDocLSI = new DocumentLucene();
                    contentDocLSI.addField(getDocumentField(FIELD_CODE_ALL_SOURCE), jp.getAllSource());

                    pathDocLSI = new DocumentLucene();
                    pathDocLSI.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                    pathDocLSI.addField(getDocumentField(FIELD_NAME), f.getName());
                }

            } else if (f.getPath().endsWith(".txt")) {
                doc = new DocumentLucene();
                doccs = new DocumentLucene();
                doc.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                doccs.addField(getDocumentFieldCS(FIELD_FILEPATH), f.getCanonicalPath());
                doc.addField(getDocumentField(FIELD_NAME), f.getName());
                doccs.addField(getDocumentFieldCS(FIELD_NAME), f.getName());

                String textFile = Utilities.readFile(f);
                doc.addField(getDocumentField(FIELD_DOC_TEXT), textFile);
                doccs.addField(getDocumentFieldCS(FIELD_DOC_TEXT), textFile);
                ////////////////////
                if (applyLSI) {
                    contentDocLSI = new DocumentLucene();
                    //                    doclsi.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                    contentDocLSI.addField(getDocumentField(FIELD_DOC_TEXT), textFile);
                    pathDocLSI = new DocumentLucene();
                    pathDocLSI.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                    pathDocLSI.addField(getDocumentField(FIELD_NAME), f.getName());
                }

            } else if (f.getPath().endsWith(".xml")) {

                getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_DOC_TEXT), new WikiAnalyzer());
                getFieldAnalyzerCS().addAnalyzer(getDocumentFieldCS(FIELD_DOC_TEXT),
                        new WikiCaseSensitiveAnalyzer());

                doc = new DocumentLucene();
                doccs = new DocumentLucene();
                doc.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                doccs.addField(getDocumentFieldCS(FIELD_FILEPATH), f.getCanonicalPath());
                doc.addField(getDocumentField(FIELD_NAME), f.getName());
                doccs.addField(getDocumentFieldCS(FIELD_NAME), f.getName());

                String textFile = Utilities.readFile(f);
                doc.addField(getDocumentField(FIELD_DOC_TEXT), textFile);
                doccs.addField(getDocumentFieldCS(FIELD_DOC_TEXT), textFile);
                ////////////////////
                if (applyLSI) {
                    contentDocLSI = new DocumentLucene();
                    //                    doclsi.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                    contentDocLSI.addField(getDocumentField(FIELD_DOC_TEXT), textFile);
                    pathDocLSI = new DocumentLucene();
                    pathDocLSI.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
                    pathDocLSI.addField(getDocumentField(FIELD_NAME), f.getName());
                }

            }

            writer.addDocument(doc.getDoc(), getFieldAnalyzer());
            writer.addDocument(doccs.getDoc(), getFieldAnalyzerCS());
            ////////
            if (applyLSI) {
                setFieldAnalyzer(new PerFieldAnalyzerWrapper(new StopStemAnalyzer()));
                writerLSI.addDocument(pathDocLSI.getDoc(), getFieldAnalyzer());
                writerLSI.addDocument(contentDocLSI.getDoc(), getFieldAnalyzer());
            }

            if (operation == ADD_INDEX) {
                this.notifyAddedDocument();
            } else if (operation == MAKE_INDEX) {
                this.notifyIndexedDocument();
            }

        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
            this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
        }
    }

    /**
     * Metodo para unir los comentarios de las variables, los metodos y de la
     * clase de codigo fuente y para unir javadocs tambin
     *
     * @param aa
     * @return
     */
    private String joinData(ArrayList<String> aa) {
        String result = " ";

        if (!aa.isEmpty()) {
            for (int i = 0; i < aa.size(); i++) {
                if (aa.get(i) != null) {
                    result = result.concat(" " + aa.get(i));
                }
            }
        } else {
            result = " ";
        }

        return result;

    }

    /**
     * Devuelve una lista de DocumentMetaData construida a partir de los
     * resultados de bsqueda
     *
     * @param sd
     * @param queryT
     * @param caseS
     * @return
     * @throws IOException
     * @throws ParseException
     */
    private ArrayList<DocumentMetaData> saveResults(ScoreDoc[] sd, boolean caseS, Query q) {
        DocumentMetaData metaDoc;
        long size = 0;
        String summary = null, filepath = null, name = null, author = null, lastModified = null;
        ArrayList<DocumentMetaData> docsfound = new ArrayList<DocumentMetaData>();
        this.setFieldAnalyzer(null);
        String textfield = null, field = null, filetype = null, scoreString = null;
        float score;
        double ss;

        for (int k = 0; k < sd.length; k++) {
            metaDoc = new DocumentMetaData();

            this.scoreDocObj = sd[k];
            int iddoc = this.scoreDocObj.doc;
            //scoreDocObj.doc es el numero que representa ese doc en los resultados de busqueda

            score = this.scoreDocObj.score;
            scoreString = String.valueOf(score);
            ss = Double.valueOf(scoreString);

            this.docum = null;
            try {
                this.docum = this.searcher.doc(iddoc);
            } catch (CorruptIndexException ex) {
                OutputMonitor.printStream("", ex);
                this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            } catch (IOException ex) {
                OutputMonitor.printStream("", ex);
                this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
            }

            if (caseS == false) {
                filepath = this.docum.get(getDocumentField(FIELD_FILEPATH));
                name = this.docum.get(getDocumentField(FIELD_NAME));
                author = this.docum.get(getDocumentField(AUTHOR_DOCUMENTS));
                lastModified = this.docum.get(getDocumentField(LAST_MODIFIED_DOCUMENTS));
                //this.analyzer = new NGramAnalyzer();
                this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer()));
                this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS),
                        new StopStemAnalyzer());

            } else {
                filepath = this.docum.get(getDocumentFieldCS(FIELD_FILEPATH));
                name = this.docum.get(getDocumentFieldCS(FIELD_NAME));
                author = this.docum.get(getDocumentField(AUTHOR_DOCUMENTS));
                lastModified = this.docum.get(getDocumentField(LAST_MODIFIED_DOCUMENTS));
                //this.analyzer = new NGramAnalyzerCaseSensitive();
                this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzerCaseSensitive()));
                this.getFieldAnalyzer().addAnalyzer(getDocumentFieldCS(FIELD_CODE_ALL_COMMENTS),
                        new StopStemAnalyzerCaseSensitive());
            }

            filetype = getFileExtension(filepath);
            File f = new File(filepath);
            size = f.length();

            if (filetype.equalsIgnoreCase("java")) {
                //  field = getDocumentField(SearchAssignable.FIELD_CODE_ALL_COMMENTS);
                //textfield = this.docum.get(getDocumentField(SearchAssignable.FIELD_CODE_ALL_COMMENTS));
                field = getDocumentField(FIELD_CODE_ALL_SOURCE);
                textfield = docum.get(getDocumentField(FIELD_CODE_ALL_SOURCE));

            } else if (filetype.equalsIgnoreCase("pdf")) {
                field = getDocumentField(FIELD_DOC_BOOK);
                textfield = this.docum.get(getDocumentField(FIELD_DOC_BOOK));

            } else if (filetype.equalsIgnoreCase("txt")) {
                field = getDocumentField(FIELD_DOC_TEXT);
                textfield = this.docum.get(getDocumentField(FIELD_DOC_TEXT));
            }
            if (textfield != null) {
                String temp = getHighlighter(q, this.getFieldAnalyzer(), textfield, field);
                summary = filterTags(filterTags(temp, "<B>"), "</B>");
            } else {
                summary = " ";
            }
            //////////////////////////////
            metaDoc.setName(name);
            metaDoc.setPath(filepath);
            metaDoc.setIndex(iddoc);
            metaDoc.setAuthor(author);
            metaDoc.setLastModified(lastModified);
            //   System.err.println("lucene "+iddoc);
            metaDoc.setSynthesis(summary);
            metaDoc.setSize(size);
            metaDoc.setType(filetype);
            metaDoc.setScore(ss);
            metaDoc.setSearcher(KeySearchable.LUCENE_SEARCH_ENGINE);
            docsfound.add(metaDoc);

        } //end for
        return docsfound;
    }

    /**
     * Para la sumarizacin
     *
     * @return
     */
    private String getHighlighter(Query q, Analyzer a, String text, String field) {

        String summary = null;

        this.hg = new Highlighter(new QueryTermScorer(q));
        this.hg.setTextFragmenter(new SimpleFragmenter(20));
        this.hg.setMaxDocCharsToAnalyze(600);

        try {
            try {
                this.tokens = TokenSources.getTokenStream(field, text, a);
                summary = this.hg.getBestFragments(this.tokens, text, 20, "...");
                // summary = this.hg.getBestFragments(this.tokens, text, 10).toString();
            } catch (IOException ex) {
                OutputMonitor.printStream("IO", ex);
            }
        } catch (InvalidTokenOffsetsException ex) {
            OutputMonitor.printStream("", ex);
        }

        if (summary == null) {
            summary = " ";
        }
        return summary;
    }

    /**
     * Mtodo para eliminar etiquetas producidas en el summary por el analyzer
     *
     * @param text
     * @param mark
     * @return
     */
    public String filterTags(String text, String mark) {
        String result = "";

        if (text.contains(mark)) {
            String[] array = text.split(mark);
            for (int i = 0; i < array.length; i++) {
                result += array[i];
            }
        }
        return result;
    }

    /**
     *
     * @param indexDirectory
     * @return
     * @throws IndexException
     */
    public CollectionInfo getCollectionInfo(String indexDirectory) throws IndexException {
        this.defaultIndexLSIPath = indexDirectory;
        return getCollectionInfo();

    }

    /**
     * Este mtodo obtiene la relacin de ocurrencia de los trminos en el
     * ndice de la coleccin especificada.
     *
     * @return relacin documentos por trmino
     *
     * @throws IndexException si ocurre una error el el proceso de obtencin de
     * los trminos de la coleccin.
     */
    public CollectionInfo getCollectionInfo() throws IndexException {
        try {
            this.indexLSIPath = new File(defaultIndexLSIPath);
            this.directory = FSDirectory.open(this.indexLSIPath);
            if (IndexReader.indexExists(this.directory)) {
                // se verifica que exista un ndice en el directorio especificado
                this.reader = IndexReader.open(this.directory);
                TermEnum terms = this.reader.terms(); // se obtienen todos los trminos del ndice de la coleccin

                Map<TermInfo, List<DocTermInfo>> termsMap = new HashMap<TermInfo, List<DocTermInfo>>();
                List<DocTermInfo> list;
                Term termItem;
                TermDocs docs;
                int docsCount = 0, termsCount = 0;
                docs = this.reader.termDocs();
                Document doc;
                List<String> termsList = new ArrayList<String>();
                Set<Integer> docsIds = new HashSet<Integer>();
                docsCount = this.reader.numDocs();
                Map<Integer, Integer> docsMap = new HashMap<Integer, Integer>();
                List<DocInfo> docInfoList = new ArrayList<DocInfo>(docsCount);
                String name, filePath;
                int index = 0;
                for (int i = 0; i < docsCount; i += 2) {
                    doc = this.reader.document(i);
                    name = doc.get("name");
                    filePath = doc.get("filepath");
                    docInfoList.add(new DocInfo(name, filePath));
                    docsMap.put(i + 1, index);
                    index++;
                }

                docsMap.remove(docsCount + 1);

                while (terms.next()) {
                    termItem = terms.term();
                    list = new ArrayList<DocTermInfo>();
                    docs = this.reader.termDocs(termItem);
                    while (docs.next()) {
                        int docNum = docs.doc();
                        if (!(docNum % 2 == 0)) {
                            doc = this.reader.document(docNum);
                            int termFreq = docs.freq();
                            list.add(new DocTermInfo(docsMap.get(docNum), termFreq));
                            docsIds.add(docNum);
                        }
                    }

                    if (!list.isEmpty()) {
                        termsMap.put(new TermInfo(termsCount, termItem.text(), reader.docFreq(termItem)), list);
                        termsList.add(termItem.text());
                        termsCount++;
                    }
                }
                return new CollectionInfo(termsMap, "Apache Lucene", termsList, docInfoList, singularValue);
            } else {
                throw new IndexException("Index invalid. Not exist index in the directory: " + defaultIndexLSIPath);
            }

        } catch (IOException ex) {
            throw new IndexException(ex.getMessage());
        }

    }

    /**
     * @return the fieldAnalyzer
     */
    public PerFieldAnalyzerWrapper getFieldAnalyzer() {
        return fieldAnalyzer;
    }

    /**
     * @param fieldAnalyzer the fieldAnalyzer to set
     */
    public void setFieldAnalyzer(PerFieldAnalyzerWrapper fieldAnalyzer) {
        this.fieldAnalyzer = fieldAnalyzer;
    }

    /**
     * @return the fieldAnalyzerCS
     */
    public PerFieldAnalyzerWrapper getFieldAnalyzerCS() {
        return fieldAnalyzerCS;
    }

    /**
     * @param fieldAnalyzerCS the fieldAnalyzerCS to set
     */
    public void setFieldAnalyzerCS(PerFieldAnalyzerWrapper fieldAnalyzerCS) {
        this.fieldAnalyzerCS = fieldAnalyzerCS;
    }
}