com.basistech.lucene.tools.LuceneQueryToolTest.java Source code

Java tutorial

Introduction

Here is the source code for com.basistech.lucene.tools.LuceneQueryToolTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.basistech.lucene.tools;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.regex.Pattern;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class LuceneQueryToolTest {
    private static DirectoryReader reader;
    private static boolean showOutput;

    @BeforeClass
    public static void oneTimeSetup() throws IOException, ParseException {
        LuceneQueryToolTest.showOutput = false; // for debugging tests
        Directory dir = new RAMDirectory();
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
        IndexWriter writer = new IndexWriter(dir, config);
        Document doc = new Document();
        doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED));
        doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
        doc = new Document();
        doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED));
        doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
        doc = new Document();
        doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED));
        doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
        doc = new Document();
        doc.add(new Field("bbb", "foo", StringField.TYPE_STORED));
        doc.add(new Field("bbb", "bar", StringField.TYPE_STORED));
        doc.add(new Field("aaa", "foo", StringField.TYPE_STORED));
        FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED);
        typeUnindexed.setIndexed(false);
        doc.add(new Field("zzz", "foo", typeUnindexed));
        writer.addDocument(doc);
        writer.close();
        reader = DirectoryReader.open(dir);
    }

    private List<String> getOutput(ByteArrayOutputStream bytes) throws UnsupportedEncodingException {
        List<String> lines = Lists.newArrayList();
        for (String line : bytes.toString("UTF-8").split(System.getProperty("line.separator"))) {
            if (showOutput) {
                System.out.println(line);
            }
            if (!line.isEmpty()) {
                lines.add(line);
            }
        }
        return lines;
    }

    private boolean hasLineStartingWith(String prefix, List<String> lines) {
        for (String line : lines) {
            if (line.startsWith(prefix)) {
                return true;
            }
        }
        return false;
    }

    @Test
    public void testQueryAll() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("longest-mention: Bill Clinton"));
        assertTrue(lines.contains("longest-mention: George W. Bush"));
        assertTrue(lines.contains("longest-mention: George H. W. Bush"));
    }

    @Test
    public void testQueryStoredStringField() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "longest-mention:Bill\\ Clinton" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("longest-mention: Bill Clinton"));
    }

    @Test
    public void testQueryStoredStringFieldWrongCase() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "longest-mention:bill\\ clinton" });
        List<String> lines = getOutput(bytes);
        assertEquals(0, lines.size());
    }

    @Test
    public void testQueryAnalyzedField() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "context:bush" });
        List<String> lines = getOutput(bytes);
        assertEquals(2, lines.size());
        assertTrue(lines.contains("longest-mention: George W. Bush"));
        assertTrue(lines.contains("longest-mention: George H. W. Bush"));
    }

    @Test
    public void testDefaultField() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setDefaultField("context");
        lqt.run(new String[] { "bush" });
        List<String> lines = getOutput(bytes);
        assertEquals(2, lines.size());
        assertTrue(lines.contains("longest-mention: George W. Bush"));
        assertTrue(lines.contains("longest-mention: George H. W. Bush"));
    }

    @Test
    public void testQueryLimit() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setShowHits(true);
        lqt.setQueryLimit(1);
        lqt.run(new String[] { "context:bush" });
        List<String> lines = getOutput(bytes);
        assertEquals(2, lines.size());
        assertTrue(lines.contains("totalHits: 2"));
        assertTrue(lines.contains("longest-mention: George W. Bush"));
    }

    @Test
    public void testOutputLimit() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setOutputLimit(1);
        lqt.setShowId(true);
        lqt.setRegex("aaa", Pattern.compile("foo"));
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("<id>: 3"));
    }

    @Test
    public void testSortFields() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setSortFields(true);
        lqt.run(new String[] { "%ids", "3" });
        List<String> lines = getOutput(bytes);
        assertEquals("aaa: foo", lines.get(0));
        assertEquals("bbb: bar", lines.get(1));
        assertEquals("bbb: foo", lines.get(2));
    }

    @Test
    public void testShowId() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setShowId(true);
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(hasLineStartingWith("<id>:", lines));
    }

    @Test
    public void testShowHits() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setShowHits(true);
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(hasLineStartingWith("totalHits:", lines));
    }

    @Test
    public void testShowScore() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setShowScore(true);
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(hasLineStartingWith("<score>:", lines));
    }

    @Test
    public void testQueryIds() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setShowId(true);
        lqt.run(new String[] { "%ids", "0", "1", "2" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("<id>: 0"));
        assertTrue(lines.contains("<id>: 1"));
        assertTrue(lines.contains("<id>: 2"));
    }

    @Test
    public void testRegex() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setRegex("longest-mention", Pattern.compile(".*Clint.*"));
        lqt.run(new String[] { "%all" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("longest-mention: Bill Clinton"));
    }

    @Test
    public void testQueryCasedLuceneRegex() throws IOException, ParseException {
        // Requires queryParser.setLowercaseExpandedTerms(false), otherwise
        // can't match uppercase with regex on unanalyzed field!
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "longest-mention:/Bill C.*/" });
        List<String> lines = getOutput(bytes);
        assertTrue(lines.contains("longest-mention: Bill Clinton"));
    }

    @Test
    public void testFields() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("bbb"));
        lqt.run(new String[] { "%ids", "3" });
        List<String> lines = getOutput(bytes);
        assertEquals(2, lines.size());
        assertTrue(lines.contains("bbb: foo"));
        assertTrue(lines.contains("bbb: bar"));
        // "aaa: foo" should be missing
    }

    @Test
    public void testTabular() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("longest-mention"));
        lqt.setTabular(true);
        lqt.run(new String[] { "%ids", "0" });
        List<String> lines = getOutput(bytes);
        assertEquals(2, lines.size());
        assertEquals("longest-mention", lines.get(0));
        assertEquals("Bill Clinton", lines.get(1));
    }

    @Test
    public void testSuppressNames() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("longest-mention"));
        lqt.setTabular(true);
        lqt.setSuppressNames(true);
        lqt.run(new String[] { "%ids", "0" });
        List<String> lines = getOutput(bytes);
        assertEquals(1, lines.size());
        assertEquals("Bill Clinton", lines.get(0));
    }

    @Test
    public void testEnumerateFields() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%enumerate-fields" });
        List<String> lines = getOutput(bytes);
        assertEquals(5, lines.size());
        assertTrue(lines.contains("aaa"));
        assertTrue(lines.contains("bbb"));
        assertTrue(lines.contains("context"));
        assertTrue(lines.contains("longest-mention"));
        assertTrue(lines.contains("zzz"));
    }

    @Test
    public void testCountFields() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%count-fields" });
        List<String> lines = getOutput(bytes);
        assertEquals(5, lines.size());
        assertTrue(lines.contains("aaa: 1"));
        assertTrue(lines.contains("bbb: 1"));
        assertTrue(lines.contains("context: 3"));
        assertTrue(lines.contains("longest-mention: 3"));
        assertTrue(lines.contains("zzz: 0")); // 0 because not indexed
    }

    @Test
    public void testEnumerateTerms() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%enumerate-terms", "context" });
        List<String> lines = getOutput(bytes);
        assertEquals(7, lines.size());
        assertTrue(lines.contains("arkansas (1)"));
        assertTrue(lines.contains("barbara (1)"));
        assertTrue(lines.contains("bush (2)"));
        assertTrue(lines.contains("clinton (1)"));
        assertTrue(lines.contains("hillary (1)"));
        assertTrue(lines.contains("laura (1)"));
        assertTrue(lines.contains("texas (2)"));
    }

    @Test
    public void testSetOutput() throws Exception {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader);
        lqt.setOutputStream(out);
        lqt.run(new String[] { "%all" });
        assertTrue(getOutput(bytes).size() > 0);
    }

    @Test
    public void testScript() throws Exception {
        List<String> lines = Lists.newArrayList("-q context:bush -o target/out1",
                "-q context:clinton -o target/out2");
        FileUtils.writeLines(new File("target/script.txt"), lines);
        LuceneQueryTool lqt = new LuceneQueryTool(reader);
        lqt.run(new String[] { "%script", "target/script.txt" });
        lines = FileUtils.readLines(new File("target/out1"), "UTF-8");
        assertTrue(Joiner.on(",").join(lines).contains("George"));
        lines = FileUtils.readLines(new File("target/out2"), "UTF-8");
        assertTrue(Joiner.on(",").join(lines).contains("Bill"));
    }

    @Test(expected = RuntimeException.class)
    public void testTabularMultivalued() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("bbb"));
        lqt.setTabular(true);
        lqt.run(new String[] { "%ids", "3" });
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidAnalyzer() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setAnalyzer("InvalidAnalyzer");
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidDefaultField() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setDefaultField("longest-mentionn");
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidFields() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("longest-mentionn"));
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidQuery() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "longest-mentionn:Bill\\ Clinton" });
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidRegex() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setRegex("longest-mentionn", Pattern.compile(".*Clint"));
    }

    @Test(expected = RuntimeException.class)
    public void testRegexWithMissingField() throws IOException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.setFieldNames(Lists.newArrayList("context"));
        lqt.setRegex("longest-mention", Pattern.compile(".*Clint.*"));
    }

    @Test(expected = RuntimeException.class)
    public void testUnindexedEnumerateTerms() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%enumerate-terms", "zzz" });
    }

    @Test(expected = RuntimeException.class)
    public void testInvalidEnumerateTerms() throws IOException, ParseException {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
        PrintStream out = new PrintStream(bytes);
        LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
        lqt.run(new String[] { "%enumerate-terms", "longest-mentionn" });
    }
}