com.netcrest.pado.index.provider.lucene.TopNLuceneSearch.java Source code

Java tutorial

Introduction

Here is the source code for com.netcrest.pado.index.provider.lucene.TopNLuceneSearch.java

Source

/*
 * Copyright (c) 2013-2015 Netcrest Technologies, LLC. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.netcrest.pado.index.provider.lucene;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import com.netcrest.pado.data.jsonlite.JsonLite;
import com.netcrest.pado.exception.PadoException;
import com.netcrest.pado.index.internal.Constants;
import com.netcrest.pado.index.result.IndexableResult;
import com.netcrest.pado.index.result.ResultItem;
import com.netcrest.pado.index.result.ValueInfo;
import com.netcrest.pado.index.service.GridQuery;
import com.netcrest.pado.pql.PqlParser;
import com.netcrest.pado.temporal.ITemporalData;
import com.netcrest.pado.temporal.ITemporalKey;
import com.netcrest.pado.temporal.TemporalEntry;
import com.netcrest.pado.temporal.gemfire.impl.GemfireTemporalData;

@SuppressWarnings({ "rawtypes", "unchecked" })
public class TopNLuceneSearch extends LuceneSearch

{
    public static final Version LUCENE_VERSION = Version.LUCENE_47;

    /**
     * Contains all registered LuceneSearch objects. >full-path,
     * LuceneSearch<.
     */
    private final static Map<String, TopNLuceneSearch> luceneSearchMap = new HashMap<String, TopNLuceneSearch>();

    private static final DateTool.Resolution TIME_RESOLUTION = DateTool.Resolution.DAY;

    public static final String MIN_DATE = "19700101";
    public static final String MAX_DATE = "20991231";
    public static long MIN_TIME;
    public static long MAX_TIME;

    static {
        try {
            MIN_TIME = DateTool.stringToTime(MIN_DATE, TIME_RESOLUTION);
            MAX_TIME = DateTool.stringToTime(MAX_DATE, TIME_RESOLUTION);
        } catch (ParseException e) {
            // ignore
        }
    }

    /**
     * Time query predicate. date range is (inclusive, exclusive)
     */
    public final static String TIME_QUERY_PREDICATE = "StartValidTime:[" + MIN_DATE
            + " TO %s] AND EndValidTime:{%s TO " + MAX_DATE + "] AND StartWrittenTime:[" + MIN_DATE
            + " TO %s] AND EndWrittenTime:{%s TO " + MAX_DATE + "]";

    public final static String START_WRITTEN_TIME_RANGE_QUERY_PREDICATE = "StartValidTime:[" + MIN_DATE
            + " TO %s] AND EndValidTime:[%s TO " + MAX_DATE + "] AND StartWrittenTime:[" + "%s" + " TO %s]";

    private TopNLuceneSearch() {
    }

    /**
     * Returns LuceneSearch for the specified full path. It creates a new
     * instance if LuceneSearch does not exist.
     * 
     * @param fullPath
     *            Full path
     */
    public static TopNLuceneSearch getTopLuceneSearch(String fullPath) {
        TopNLuceneSearch search = luceneSearchMap.get(fullPath);
        if (search == null) {
            search = new TopNLuceneSearch();
            luceneSearchMap.put(fullPath, search);
        }
        return search;
    }

    protected Set<Object> getIdentityKeySet(String queryString, Directory dir) {
        Set<Object> identityKeySet = new HashSet<Object>();
        DirectoryReader reader;
        try {
            reader = DirectoryReader.open(dir);
        } catch (CorruptIndexException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }

        Query query;
        try {
            StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
            query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
        } catch (Exception ex) {
            // Lucene bug. Unable to serialize exception. Log the message and
            // throw a new exception with the string message.
            ex.printStackTrace();
            throw new PadoException(ex.getMessage());
        }
        IndexSearcher searcher = new IndexSearcher(reader);
        TopDocs results;
        try {
            results = searcher.search(query, null, Integer.MAX_VALUE);
            for (ScoreDoc hit : results.scoreDocs) {
                Document doc;
                try {
                    doc = searcher.doc(hit.doc);
                } catch (CorruptIndexException e) {
                    e.printStackTrace();
                    throw new RuntimeException(e);
                } catch (IOException e) {
                    e.printStackTrace();
                    throw new RuntimeException(e);
                }
                // IndexableField field = doc.getField("IdentityKey");
                // if (field == null) {
                // continue;
                // }
                // Object identityKey = field.stringValue();
                // if (identityKey == null) {
                // identityKey = field.numericValue();
                // }
                // if (identityKey == null) {
                // BytesRef br = field.binaryValue();
                // if (br != null) {
                // byte[] blob = br.bytes;
                // try {
                // identityKey = BlobHelper.deserializeBlob(blob);
                // identityKeySet.add(identityKey);
                // } catch (Exception ex) {
                // Logger.warning("Identity key deserialization error", ex);
                // }
                // } else {
                // identityKey = field.toString();
                // }
                // }
                LuceneField luceneField = new LuceneField();
                ITemporalKey temporalKey = luceneField.getTemporalKey(doc);

                if (temporalKey != null) {
                    float docScore = hit.score;
                    identityKeySet.add(temporalKey.getIdentityKey());
                }
            }
        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
        return identityKeySet;
    }

    protected Set<ITemporalKey> getTemporalKeySet(String queryString, Directory dir) {
        Set<ITemporalKey> temporalKeySet = new HashSet<ITemporalKey>();
        DirectoryReader reader;
        try {
            reader = DirectoryReader.open(dir);
        } catch (CorruptIndexException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
        Query query;
        try {
            StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
            query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
        } catch (Exception ex) {
            // Lucene 4.7 bug, internal message not serializable
            // Send message instead of nesting the cause.
            throw new RuntimeException(ex.getMessage());
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        TopDocs results;
        try {
            results = searcher.search(query, null, Integer.MAX_VALUE);

            for (ScoreDoc hit : results.scoreDocs) {
                Document doc;
                try {
                    doc = searcher.doc(hit.doc);
                } catch (CorruptIndexException e) {
                    e.printStackTrace();
                    throw new RuntimeException(e);
                } catch (IOException e) {
                    e.printStackTrace();
                    throw new RuntimeException(e);
                }
                LuceneField luceneField = new LuceneField();
                ITemporalKey temporalKey = luceneField.getTemporalKey(doc);
                if (temporalKey != null) {
                    temporalKeySet.add(temporalKey);
                }
            }
        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
        return temporalKeySet;
    }

    @Override
    public List combineAndSort(List entities, GridQuery criteria, boolean isMember) {
        // TODO Auto-generated method stub
        ITemporalKey tk = null;
        Map<String, Object> intermMap = new HashMap<String, Object>(entities.size());
        List<Object> finalResult = new ArrayList(entities.size());
        List<JsonLite> sortedSet = new ArrayList<JsonLite>(entities.size());
        String toCompare = PqlParser.getTextSearchPHRASE(criteria);
        String targetField = PqlParser.getTextSearchTargetField(criteria);
        if (!isMember) {
            int i = 0;
            for (Object obj : entities) {
                intermMap.put(String.valueOf(i), obj);
                IndexableResult<Integer, Object> indexableResult = (IndexableResult<Integer, Object>) obj;
                Object rsObj = indexableResult.getValue();
                TemporalEntry resultTemEntry = null;
                if (rsObj instanceof ResultItem) {
                    ResultItem<Object> valueObj = (ResultItem<Object>) indexableResult.getValue();
                    Object resultObj = ((ValueInfo) valueObj.getItem()).getValue();

                    if (resultObj instanceof TemporalEntry) {
                        resultTemEntry = ((TemporalEntry) resultObj);
                    }
                } else if (rsObj instanceof TemporalEntry) {
                    resultTemEntry = ((TemporalEntry) rsObj);
                }
                tk = resultTemEntry.getTemporalKey();
                ITemporalData temporaData = ((TemporalEntry) resultTemEntry).getTemporalData();
                if (temporaData instanceof GemfireTemporalData) {
                    JsonLite gemFireData = (JsonLite) ((GemfireTemporalData) temporaData).getValue();
                    gemFireData.put("Temp_index", String.valueOf(i));
                    sortedSet.add(gemFireData);
                } else {
                    // what to do it here ??
                }

                i++;
            }
        } else {
            int i = 0;
            for (Object obj : entities) {
                intermMap.put(String.valueOf(i), obj);
                ITemporalData temporaData = ((TemporalEntry) obj).getTemporalData();
                tk = ((TemporalEntry) obj).getTemporalKey();
                JsonLite gemFireData = (JsonLite) ((GemfireTemporalData) temporaData).getValue();
                String targetFieldValue = (String) gemFireData.get(targetField);
                double score = TopNLuceneSearch.getLSScore(toCompare, targetFieldValue);
                gemFireData.put(Constants.TEXT_SEARCH_SCORE, score);

                gemFireData.put("Temp_index", String.valueOf(i));
                sortedSet.add(gemFireData);
                i++;
            }

        }
        Collections.sort(sortedSet, new Comparator<JsonLite>() {
            @Override
            public int compare(JsonLite o1, JsonLite o2) {
                // TODO Auto-generated method stub
                Double o2Score = (Double) o2.get(Constants.TEXT_SEARCH_SCORE);
                Double o1Score = (Double) o1.get(Constants.TEXT_SEARCH_SCORE);
                return o2Score.doubleValue() == o1Score.doubleValue() ? 0
                        : (o2Score.doubleValue() > o1Score.doubleValue() ? 1 : -1);
            }
        });

        int j = 0;
        for (JsonLite lite : sortedSet) {
            String index = (String) lite.get("Temp_index");
            lite.remove("Temp_index");
            Object originalEntity = intermMap.get(index);
            finalResult.add(originalEntity);
            j++;
        }
        entities.clear();
        entities.addAll(finalResult);

        return finalResult;
    }

    public static double getLSScore(String fromValue, String targetFieldValue) {
        // TODO Auto-generated method stub
        double maxLen = Math.max(fromValue.length(), targetFieldValue.length());
        double lsDistance = StringUtils.getLevenshteinDistance(fromValue, targetFieldValue);
        return 1d - lsDistance / maxLen;
    }

}