Java tutorial
/* * Copyright 2014 GoDataDriven B.V. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.divolte.server.filesinks.hdfs; import static org.junit.Assert.*; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.stream.Collectors; import java.util.stream.LongStream; import java.util.stream.StreamSupport; import javax.annotation.ParametersAreNonnullByDefault; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.avro.io.DatumReader; import org.apache.commons.lang.mutable.MutableInt; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableMap; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import io.divolte.server.AvroRecordBuffer; import io.divolte.server.DivolteIdentifier; import io.divolte.server.config.FileSinkConfiguration; import io.divolte.server.config.ValidatedConfiguration; import io.divolte.server.filesinks.FileFlusher; import io.divolte.server.processing.Item; @ParametersAreNonnullByDefault public class FileFlusherLocalHdfsTest { private static final Logger logger = LoggerFactory.getLogger(FileFlusherLocalHdfsTest.class); @SuppressWarnings("PMD.AvoidUsingHardCodedIP") private static final String ARBITRARY_IP = "8.8.8.8"; private Schema schema; private Path tempInflightDir; private Path tempPublishDir; private List<Record> records; private FileFlusher flusher; @Before public void setup() throws IOException { schema = schemaFromClassPath("/MinimalRecord.avsc"); tempInflightDir = Files.createTempDirectory("hdfs-flusher-test-inflight"); tempPublishDir = Files.createTempDirectory("hdfs-flusher-test-publish"); } @After public void teardown() throws IOException { schema = null; Files.walk(tempInflightDir).filter((p) -> !p.equals(tempInflightDir)).forEach(this::deleteQuietly); deleteQuietly(tempInflightDir); tempInflightDir = null; Files.walk(tempPublishDir).filter((p) -> !p.equals(tempPublishDir)).forEach(this::deleteQuietly); deleteQuietly(tempPublishDir); tempPublishDir = null; flusher = null; records = null; } @Test public void shouldCreateAndPopulateFileWithSimpleStrategy() throws IOException { setupFlusher("1 day", 10); processRecords(); flusher.cleanup(); Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).findFirst() .ifPresent((p) -> verifyAvroFile(records, schema, p)); } @Test public void shouldWriteInProgressFilesWithNonAvroExtension() throws IOException { setupFlusher("1 day", 10); processRecords(); assertTrue(Files.walk(tempInflightDir).anyMatch(p -> p.toString().endsWith(".avro.partial"))); } @Test public void shouldRollFilesWithSimpleStrategy() throws IOException, InterruptedException { setupFlusher("1 second", 5); processRecords(); for (int c = 0; c < 2; c++) { Thread.sleep(500); flusher.heartbeat(); } processRecords(); flusher.cleanup(); final MutableInt count = new MutableInt(0); Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).forEach((p) -> { verifyAvroFile(records, schema, p); count.increment(); }); assertEquals(2, count.intValue()); } @Test public void shouldNotCreateEmptyFiles() throws IOException, InterruptedException { setupFlusher("100 millisecond", 5); processRecords(); for (int c = 0; c < 4; c++) { Thread.sleep(500); flusher.heartbeat(); } processRecords(); flusher.cleanup(); final MutableInt count = new MutableInt(0); Files.walk(tempPublishDir).filter((p) -> p.toString().endsWith(".avro")).forEach((p) -> { verifyAvroFile(records, schema, p); count.increment(); }); assertEquals(2, count.intValue()); } private void setupFlusher(final String rollEvery, final int recordCount) throws IOException { final Config config = ConfigFactory .parseMap(ImmutableMap.of("divolte.sinks.hdfs.file_strategy.roll_every", rollEvery, "divolte.sinks.hdfs.file_strategy.working_dir", tempInflightDir.toString(), "divolte.sinks.hdfs.file_strategy.publish_dir", tempPublishDir.toString())) .withFallback(ConfigFactory.parseResources("hdfs-flusher-test.conf")) .withFallback(ConfigFactory.parseResources("reference-test.conf")); final ValidatedConfiguration vc = new ValidatedConfiguration(() -> config); records = LongStream.range(0, recordCount).mapToObj( (time) -> new GenericRecordBuilder(schema).set("ts", time).set("remoteHost", ARBITRARY_IP).build()) .collect(Collectors.toList()); flusher = new FileFlusher( vc.configuration().getSinkConfiguration("hdfs", FileSinkConfiguration.class).fileStrategy, HdfsFileManager.newFactory(vc, "hdfs", schema).create()); } private void processRecords() { records.stream() .map((record) -> AvroRecordBuffer.fromRecord(DivolteIdentifier.generate(), DivolteIdentifier.generate(), record)) .forEach((arb) -> flusher.process(Item.of(0, arb.getPartyId().value, arb))); } private void deleteQuietly(final Path p) { try { Files.delete(p); } catch (final Exception e) { logger.debug("Ignoring failure while deleting file: " + p, e); } } private void verifyAvroFile(final List<Record> expected, final Schema schema, final Path avroFile) { final List<Record> result = StreamSupport .stream(readAvroFile(schema, avroFile.toFile()).spliterator(), false).collect(Collectors.toList()); assertEquals(expected, result); } private DataFileReader<Record> readAvroFile(final Schema schema, final File file) { final DatumReader<Record> dr = new GenericDatumReader<>(schema); try { return new DataFileReader<>(file, dr); } catch (final IOException e) { throw new UncheckedIOException(e); } } private Schema schemaFromClassPath(final String resource) throws IOException { try (final InputStream resourceStream = this.getClass().getResourceAsStream(resource)) { return new Schema.Parser().parse(resourceStream); } } }