io.divolte.server.filesinks.hdfs.FileFlusherLocalHdfsTest.java Source code

Java tutorial

Introduction

Here is the source code for io.divolte.server.filesinks.hdfs.FileFlusherLocalHdfsTest.java

Source

/*
 * Copyright 2014 GoDataDriven B.V.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.divolte.server.filesinks.hdfs;

import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import java.util.stream.StreamSupport;

import javax.annotation.ParametersAreNonnullByDefault;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.avro.io.DatumReader;
import org.apache.commons.lang.mutable.MutableInt;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ImmutableMap;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

import io.divolte.server.AvroRecordBuffer;
import io.divolte.server.DivolteIdentifier;
import io.divolte.server.config.FileSinkConfiguration;
import io.divolte.server.config.ValidatedConfiguration;
import io.divolte.server.filesinks.FileFlusher;
import io.divolte.server.processing.Item;

@ParametersAreNonnullByDefault
public class FileFlusherLocalHdfsTest {
    private static final Logger logger = LoggerFactory.getLogger(FileFlusherLocalHdfsTest.class);

    @SuppressWarnings("PMD.AvoidUsingHardCodedIP")
    private static final String ARBITRARY_IP = "8.8.8.8";

    private Schema schema;
    private Path tempInflightDir;
    private Path tempPublishDir;

    private List<Record> records;
    private FileFlusher flusher;

    @Before
    public void setup() throws IOException {
        schema = schemaFromClassPath("/MinimalRecord.avsc");
        tempInflightDir = Files.createTempDirectory("hdfs-flusher-test-inflight");
        tempPublishDir = Files.createTempDirectory("hdfs-flusher-test-publish");
    }

    @After
    public void teardown() throws IOException {
        schema = null;

        Files.walk(tempInflightDir).filter((p) -> !p.equals(tempInflightDir)).forEach(this::deleteQuietly);
        deleteQuietly(tempInflightDir);
        tempInflightDir = null;

        Files.walk(tempPublishDir).filter((p) -> !p.equals(tempPublishDir)).forEach(this::deleteQuietly);
        deleteQuietly(tempPublishDir);
        tempPublishDir = null;

        flusher = null;
        records = null;
    }

    @Test
    public void shouldCreateAndPopulateFileWithSimpleStrategy() throws IOException {
        setupFlusher("1 day", 10);
        processRecords();

        flusher.cleanup();

        Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).findFirst()
                .ifPresent((p) -> verifyAvroFile(records, schema, p));
    }

    @Test
    public void shouldWriteInProgressFilesWithNonAvroExtension() throws IOException {
        setupFlusher("1 day", 10);
        processRecords();

        assertTrue(Files.walk(tempInflightDir).anyMatch(p -> p.toString().endsWith(".avro.partial")));
    }

    @Test
    public void shouldRollFilesWithSimpleStrategy() throws IOException, InterruptedException {
        setupFlusher("1 second", 5);
        processRecords();

        for (int c = 0; c < 2; c++) {
            Thread.sleep(500);
            flusher.heartbeat();
        }

        processRecords();

        flusher.cleanup();

        final MutableInt count = new MutableInt(0);
        Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).forEach((p) -> {
            verifyAvroFile(records, schema, p);
            count.increment();
        });

        assertEquals(2, count.intValue());
    }

    @Test
    public void shouldNotCreateEmptyFiles() throws IOException, InterruptedException {
        setupFlusher("100 millisecond", 5);

        processRecords();

        for (int c = 0; c < 4; c++) {
            Thread.sleep(500);
            flusher.heartbeat();
        }

        processRecords();

        flusher.cleanup();

        final MutableInt count = new MutableInt(0);
        Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).forEach((p) -> {
            verifyAvroFile(records, schema, p);
            count.increment();
        });
        assertEquals(2, count.intValue());
    }

    private void setupFlusher(final String rollEvery, final int recordCount) throws IOException {
        final Config config = ConfigFactory
                .parseMap(ImmutableMap.of("divolte.sinks.hdfs.file_strategy.roll_every", rollEvery,
                        "divolte.sinks.hdfs.file_strategy.working_dir", tempInflightDir.toString(),
                        "divolte.sinks.hdfs.file_strategy.publish_dir", tempPublishDir.toString()))
                .withFallback(ConfigFactory.parseResources("hdfs-flusher-test.conf"))
                .withFallback(ConfigFactory.parseResources("reference-test.conf"));
        final ValidatedConfiguration vc = new ValidatedConfiguration(() -> config);

        records = LongStream.range(0, recordCount).mapToObj(
                (time) -> new GenericRecordBuilder(schema).set("ts", time).set("remoteHost", ARBITRARY_IP).build())
                .collect(Collectors.toList());

        flusher = new FileFlusher(
                vc.configuration().getSinkConfiguration("hdfs", FileSinkConfiguration.class).fileStrategy,
                HdfsFileManager.newFactory(vc, "hdfs", schema).create());
    }

    private void processRecords() {
        records.stream()
                .map((record) -> AvroRecordBuffer.fromRecord(DivolteIdentifier.generate(),
                        DivolteIdentifier.generate(), record))
                .forEach((arb) -> flusher.process(Item.of(0, arb.getPartyId().value, arb)));
    }

    private void deleteQuietly(final Path p) {
        try {
            Files.delete(p);
        } catch (final Exception e) {
            logger.debug("Ignoring failure while deleting file: " + p, e);
        }
    }

    private void verifyAvroFile(final List<Record> expected, final Schema schema, final Path avroFile) {
        final List<Record> result = StreamSupport
                .stream(readAvroFile(schema, avroFile.toFile()).spliterator(), false).collect(Collectors.toList());
        assertEquals(expected, result);
    }

    private DataFileReader<Record> readAvroFile(final Schema schema, final File file) {
        final DatumReader<Record> dr = new GenericDatumReader<>(schema);
        try {
            return new DataFileReader<>(file, dr);
        } catch (final IOException e) {
            throw new UncheckedIOException(e);
        }
    }

    private Schema schemaFromClassPath(final String resource) throws IOException {
        try (final InputStream resourceStream = this.getClass().getResourceAsStream(resource)) {
            return new Schema.Parser().parse(resourceStream);
        }
    }
}