org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.byTask.tasks;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;

/** Tests the functionality of {@link WriteLineDocTask}. */
public class WriteLineDocTaskTest extends BenchmarkTestCase {

    // class has to be public so that Class.forName.newInstance() will work
    public static final class WriteLineDocMaker extends DocMaker {

        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO));
            doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO));
            doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
            return doc;
        }

    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class NewLinesDocMaker extends DocMaker {

        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(BODY_FIELD, "body\r\ntext\ttwo", Field.Store.NO));
            doc.add(new StringField(TITLE_FIELD, "title\r\ntext", Field.Store.NO));
            doc.add(new StringField(DATE_FIELD, "date\r\ntext", Field.Store.NO));
            return doc;
        }

    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class NoBodyDocMaker extends DocMaker {
        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO));
            doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
            return doc;
        }
    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class NoTitleDocMaker extends DocMaker {
        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO));
            doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
            return doc;
        }
    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class JustDateDocMaker extends DocMaker {
        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
            return doc;
        }
    }

    // class has to be public so that Class.forName.newInstance() will work
    // same as JustDate just that this one is treated as legal
    public static final class LegalJustDateDocMaker extends DocMaker {
        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
            return doc;
        }
    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class EmptyDocMaker extends DocMaker {
        @Override
        public Document makeDocument() throws Exception {
            return new Document();
        }
    }

    // class has to be public so that Class.forName.newInstance() will work
    public static final class ThreadingDocMaker extends DocMaker {

        @Override
        public Document makeDocument() throws Exception {
            Document doc = new Document();
            String name = Thread.currentThread().getName();
            doc.add(new StringField(BODY_FIELD, "body_" + name, Field.Store.NO));
            doc.add(new StringField(TITLE_FIELD, "title_" + name, Field.Store.NO));
            doc.add(new StringField(DATE_FIELD, "date_" + name, Field.Store.NO));
            return doc;
        }

    }

    private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();

    private PerfRunData createPerfRunData(Path file, boolean allowEmptyDocs, String docMakerName) throws Exception {
        Properties props = new Properties();
        props.setProperty("doc.maker", docMakerName);
        props.setProperty("line.file.out", file.toAbsolutePath().toString());
        props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
        if (allowEmptyDocs) {
            props.setProperty("sufficient.fields", ",");
        }
        if (docMakerName.equals(LegalJustDateDocMaker.class.getName())) {
            props.setProperty("line.fields", DocMaker.DATE_FIELD);
            props.setProperty("sufficient.fields", DocMaker.DATE_FIELD);
        }
        Config config = new Config(props);
        return new PerfRunData(config);
    }

    private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody)
            throws Exception {
        InputStream in = Files.newInputStream(file);
        switch (fileType) {
        case BZIP2:
            in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
            break;
        case GZIP:
            in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
            break;
        case PLAIN:
            break; // nothing to do
        default:
            assertFalse("Unknown file type!", true); //fail, should not happen
        }
        try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
            String line = br.readLine();
            assertHeaderLine(line);
            line = br.readLine();
            assertNotNull(line);
            String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
            int numExpParts = expBody == null ? 2 : 3;
            assertEquals(numExpParts, parts.length);
            assertEquals(expTitle, parts[0]);
            assertEquals(expDate, parts[1]);
            if (expBody != null) {
                assertEquals(expBody, parts[2]);
            }
            assertNull(br.readLine());
        }
    }

    static void assertHeaderLine(String line) {
        assertTrue("First line should be a header line", line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR));
    }

    /* Tests WriteLineDocTask with a bzip2 format. */
    public void testBZip2() throws Exception {

        // Create a document in bz2 format.
        Path file = getWorkDir().resolve("one-line.bz2");
        PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.BZIP2, "title", "date", "body");
    }

    /* Tests WriteLineDocTask with a gzip format. */
    public void testGZip() throws Exception {

        // Create a document in gz format.
        Path file = getWorkDir().resolve("one-line.gz");
        PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.GZIP, "title", "date", "body");
    }

    public void testRegularFile() throws Exception {

        // Create a document in regular format.
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.PLAIN, "title", "date", "body");
    }

    public void testCharsReplace() throws Exception {
        // WriteLineDocTask replaced only \t characters w/ a space, since that's its
        // separator char. However, it didn't replace newline characters, which
        // resulted in errors in LineDocSource.
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, NewLinesDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.PLAIN, "title text", "date text", "body text two");
    }

    public void testEmptyBody() throws Exception {
        // WriteLineDocTask threw away documents w/ no BODY element, even if they
        // had a TITLE element (LUCENE-1755). It should throw away documents if they
        // don't have BODY nor TITLE
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, NoBodyDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.PLAIN, "title", "date", null);
    }

    public void testEmptyTitle() throws Exception {
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, NoTitleDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        doReadTest(file, Type.PLAIN, "", "date", "body");
    }

    /** Fail by default when there's only date */
    public void testJustDate() throws Exception {
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, JustDateDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) {
            String line = br.readLine();
            assertHeaderLine(line);
            line = br.readLine();
            assertNull(line);
        }
    }

    public void testLegalJustDate() throws Exception {
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, LegalJustDateDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) {
            String line = br.readLine();
            assertHeaderLine(line);
            line = br.readLine();
            assertNotNull(line);
        }
    }

    public void testEmptyDoc() throws Exception {
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, true, EmptyDocMaker.class.getName());
        WriteLineDocTask wldt = new WriteLineDocTask(runData);
        wldt.doLogic();
        wldt.close();

        try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) {
            String line = br.readLine();
            assertHeaderLine(line);
            line = br.readLine();
            assertNotNull(line);
        }
    }

    public void testMultiThreaded() throws Exception {
        Path file = getWorkDir().resolve("one-line");
        PerfRunData runData = createPerfRunData(file, false, ThreadingDocMaker.class.getName());
        final WriteLineDocTask wldt = new WriteLineDocTask(runData);
        Thread[] threads = new Thread[10];
        for (int i = 0; i < threads.length; i++) {
            threads[i] = new Thread("t" + i) {
                @Override
                public void run() {
                    try {
                        wldt.doLogic();
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };
        }

        for (Thread t : threads)
            t.start();
        for (Thread t : threads)
            t.join();

        wldt.close();

        Set<String> ids = new HashSet<>();
        try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) {
            String line = br.readLine();
            assertHeaderLine(line); // header line is written once, no matter how many threads there are
            for (int i = 0; i < threads.length; i++) {
                line = br.readLine();
                String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
                assertEquals(3, parts.length);
                // check that all thread names written are the same in the same line
                String tname = parts[0].substring(parts[0].indexOf('_'));
                ids.add(tname);
                assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
                assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
            }
            // only threads.length lines should exist
            assertNull(br.readLine());
            assertEquals(threads.length, ids.size());
        }
    }
}