net.sf.katta.integrationTest.lib.lucene.LuceneComplianceTest.java Source code

Java tutorial

Introduction

Here is the source code for net.sf.katta.integrationTest.lib.lucene.LuceneComplianceTest.java

Source

/**
 * Copyright 2008 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.sf.katta.integrationTest.lib.lucene;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import net.sf.katta.client.DeployClient;
import net.sf.katta.client.IDeployClient;
import net.sf.katta.client.IndexState;
import net.sf.katta.integrationTest.support.AbstractIntegrationTest;
import net.sf.katta.lib.lucene.Hit;
import net.sf.katta.lib.lucene.Hits;
import net.sf.katta.lib.lucene.ILuceneClient;
import net.sf.katta.lib.lucene.LuceneClient;
import net.sf.katta.util.FileUtil;

import org.apache.hadoop.io.WritableComparable;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.AfterClass;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

/**
 * Test common lucene operations on sharded indices through katta interface
 * versus pure lucene interface one big index.
 * 
 */
public class LuceneComplianceTest extends AbstractIntegrationTest {

    private static ILuceneClient _client;

    // index related fields
    private static String FIELD_NAME = "text";
    private static File _kattaIndex;
    private static File _luceneIndex;
    private static List<Document> _documents1;
    private static List<Document> _documents2;

    public LuceneComplianceTest() {
        super(2, false, false);
    }

    @Override
    protected void afterClusterStart() throws Exception {
        IDeployClient _deployClient = new DeployClient(_miniCluster.getProtocol());
        // generate 3 index (2 shards + once combined index)
        _luceneIndex = File.createTempFile(LuceneClientTest.class.getSimpleName(), "-lucene");
        _kattaIndex = File.createTempFile(LuceneClientTest.class.getSimpleName(), "-katta");
        _kattaIndex.delete();
        _luceneIndex.delete();
        File shard1 = new File(_kattaIndex, "shard1");
        File shard2 = new File(_kattaIndex, "shard2");
        _documents1 = createSimpleNumberDocuments(FIELD_NAME, 123);
        _documents2 = createSimpleNumberDocuments(FIELD_NAME, 78);

        writeIndex(shard1, _documents1);
        writeIndex(shard2, _documents2);
        writeIndex(_luceneIndex, combineDocuments(_documents1, _documents2));

        // deploy 2 indexes to katta
        deployIndexToKatta(_deployClient, _kattaIndex, 2);
        _client = new LuceneClient(_miniCluster.getZkConfiguration());
    }

    @AfterClass
    public static void afterClass() {
        _client.close();
        FileUtil.deleteFolder(_kattaIndex);
        FileUtil.deleteFolder(_luceneIndex);
    }

    @Test
    public void testScoreSort() throws Exception {
        // query and compare
        IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(_luceneIndex.getAbsoluteFile()));
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "0", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "1", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "2", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "15", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "23", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "2 23", null);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "nothing", null);
    }

    @Test
    public void testFieldSort() throws Exception {
        // query and compare (auto types)
        IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(_luceneIndex.getAbsoluteFile()));
        Sort sort = new Sort(new SortField[] { new SortField(FIELD_NAME, SortField.LONG) });
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "0", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "1", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "2", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "15", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "23", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "2 23", sort);
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "nothing", sort);

        // check for explicit types
        sort = new Sort(new SortField[] { new SortField(FIELD_NAME, SortField.BYTE) });
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "1", sort);
        sort = new Sort(new SortField[] { new SortField(FIELD_NAME, SortField.INT) });
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "1", sort);
        sort = new Sort(new SortField[] { new SortField(FIELD_NAME, SortField.LONG) });
        checkQueryResults(indexSearcher, _kattaIndex.getName(), FIELD_NAME, "1", sort);
    }

    private void checkQueryResults(IndexSearcher indexSearcher, String kattaIndexName, String fieldName,
            String queryTerm, Sort sort) throws Exception {
        // check all documents
        checkQueryResults(indexSearcher, kattaIndexName, fieldName, queryTerm, Short.MAX_VALUE, sort);

        // check top n documents
        checkQueryResults(indexSearcher, kattaIndexName, fieldName, queryTerm,
                (_documents1.size() + _documents2.size()) / 2, sort);
    }

    @SuppressWarnings("unchecked")
    private void checkQueryResults(IndexSearcher indexSearcher, String kattaIndexName, String fieldName,
            String queryTerm, int resultCount, Sort sort) throws Exception {

        // final Query query = new QueryParser("", new
        // KeywordAnalyzer()).parse(fieldName + ": " + queryTerm);
        final Query query = new QueryParser(Version.LUCENE_35, "", new KeywordAnalyzer())
                .parse(fieldName + ": " + queryTerm);
        final TopDocs searchResultsLucene;
        final Hits searchResultsKatta;
        if (sort == null) {
            searchResultsLucene = indexSearcher.search(query, resultCount);
            searchResultsKatta = _client.search(query, new String[] { kattaIndexName }, resultCount);
        } else {
            searchResultsLucene = indexSearcher.search(query, null, resultCount, sort);
            searchResultsKatta = _client.search(query, new String[] { kattaIndexName }, resultCount, sort);
        }

        assertEquals(searchResultsLucene.totalHits, searchResultsKatta.size());

        ScoreDoc[] scoreDocs = searchResultsLucene.scoreDocs;
        List<Hit> hits = searchResultsKatta.getHits();
        if (sort == null) {
            for (int i = 0; i < scoreDocs.length; i++) {
                assertEquals(scoreDocs[i].score, hits.get(i).getScore(), 0.0);
            }
        } else {
            for (int i = 0; i < scoreDocs.length; i++) {
                Object[] luceneFields = ((FieldDoc) scoreDocs[i]).fields;
                WritableComparable[] kattaFields = hits.get(i).getSortFields();
                assertEquals(luceneFields.length, kattaFields.length);
                for (int j = 0; j < luceneFields.length; j++) {
                    assertEquals(luceneFields[j].toString(), kattaFields[j].toString());
                }

                // Arrays.equals(scoreDocs, kattaFields);
            }
        }
    }

    private static List<Document> createSimpleNumberDocuments(String textFieldName, int count) {
        List<Document> documents = new ArrayList<Document>();
        for (int i = 0; i < count; i++) {
            String fieldContent = i + " " + (count - i);
            if (i % 2 == 0) {
                fieldContent += " 2";
            } else {
                fieldContent += " 1";
            }
            Document document = new Document();
            document.add(new Field(textFieldName, fieldContent, Store.NO, Index.ANALYZED));
            documents.add(document);
        }
        return documents;
    }

    private static List<Document> combineDocuments(List<Document>... documentLists) {
        ArrayList<Document> list = new ArrayList<Document>();
        for (List<Document> documentsList : documentLists) {
            list.addAll(documentsList);
        }

        return list;
    }

    private static void writeIndex(File file, List<Document> documents) throws IOException {
        file.mkdirs();
        assertTrue(file.exists());
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(file), new StandardAnalyzer(Version.LUCENE_35),
                true, MaxFieldLength.UNLIMITED);
        for (Document document : documents) {
            indexWriter.addDocument(document);
        }
        indexWriter.optimize();
        indexWriter.close();

    }

    private static void deployIndexToKatta(IDeployClient deployClient, File file, int replicationLevel)
            throws InterruptedException {
        IndexState indexState = deployClient.addIndex(file.getName(), file.getAbsolutePath(), replicationLevel)
                .joinDeployment();
        assertEquals(IndexState.DEPLOYED, indexState);
    }

}