com.ikanow.aleph2.storage_service_hdfs.services.TestHdfsDataWriteService.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.aleph2.storage_service_hdfs.services.TestHdfsDataWriteService.java

Source

/*******************************************************************************
* Copyright 2015, The IKANOW Open Source Project.
* 
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* 
*   http://www.apache.org/licenses/LICENSE-2.0
* 
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.ikanow.aleph2.storage_service_hdfs.services;

import static org.junit.Assert.*;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.apache.commons.io.FileUtils;
import org.junit.Test;

import scala.Tuple2;

import com.fasterxml.jackson.databind.JsonNode;
import com.ikanow.aleph2.data_model.interfaces.data_services.IStorageService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataWriteService.IBatchSubservice;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean.StorageSchemaBean;
import com.ikanow.aleph2.data_model.objects.shared.GlobalPropertiesBean;
import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils;

public class TestHdfsDataWriteService {

    /** Get some easy testing out the way
     *       HfdsDataWriteService.getSuffix
     */
    @Test
    public void test_utilityMethods_getSuffix() {

        final Date then = new Date(1441311160000L); // Thu, 03 Sep 2015 20:12:40 GMT

        // No storage schema
        {
            final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static").done().get();

            assertEquals(IStorageService.NO_TIME_SUFFIX,
                    HfdsDataWriteService.getSuffix(then, bucket, IStorageService.StorageStage.raw));
        }
        // No grouping time
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class)
                            .with(DataSchemaBean::storage_schema, BeanTemplateUtils.build(StorageSchemaBean.class)
                                    .with(StorageSchemaBean::raw, BeanTemplateUtils
                                            .build(StorageSchemaBean.StorageSubSchemaBean.class).done().get())
                                    .done().get())
                            .done().get())
                    .done().get();
            assertEquals(IStorageService.NO_TIME_SUFFIX,
                    HfdsDataWriteService.getSuffix(then, test_bucket, IStorageService.StorageStage.raw));
        }
        // Malformed grouping time
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).with(
                            DataSchemaBean::storage_schema,
                            BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                    BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                            .with(StorageSchemaBean.StorageSubSchemaBean::grouping_time_period,
                                                    "bananas")
                                            .done().get())
                                    .done().get())
                            .done().get())
                    .done().get();

            assertEquals(IStorageService.NO_TIME_SUFFIX,
                    HfdsDataWriteService.getSuffix(then, test_bucket, IStorageService.StorageStage.json));
        }
        // Valid grouping time
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).with(
                            DataSchemaBean::storage_schema,
                            BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::processed,
                                    BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                            .with(StorageSchemaBean.StorageSubSchemaBean::grouping_time_period,
                                                    "1month")
                                            .done().get())
                                    .done().get())
                            .done().get())
                    .done().get();

            assertEquals("2015-09",
                    HfdsDataWriteService.getSuffix(then, test_bucket, IStorageService.StorageStage.processed));
        }
    }

    /** Get some easy testing out the way
     *       HfdsDataWriteService.getBasePath
     */
    @Test
    public void test_utilityMethods_getBasePath() {

        final DataBucketBean bucket = BeanTemplateUtils.build(DataBucketBean.class)
                .with(DataBucketBean::full_name, "/test/static").done().get();

        assertEquals("/root/test/static/managed_bucket/import/stored/raw/current/", HfdsDataWriteService
                .getBasePath("/root", bucket, IStorageService.StorageStage.raw, Optional.empty(), "current/"));
        assertEquals("/root/test/static/managed_bucket/import/stored/raw/ping", HfdsDataWriteService
                .getBasePath("/root", bucket, IStorageService.StorageStage.raw, Optional.empty(), "ping"));
        assertEquals("/root/test/static/managed_bucket/import/stored/json/current/", HfdsDataWriteService
                .getBasePath("/root", bucket, IStorageService.StorageStage.json, Optional.empty(), "current/"));
        assertEquals("/root/test/static/managed_bucket/import/stored/json/pong", HfdsDataWriteService
                .getBasePath("/root", bucket, IStorageService.StorageStage.json, Optional.empty(), "pong"));
        assertEquals("/root/test/static/managed_bucket/import/stored/processed/current/",
                HfdsDataWriteService.getBasePath("/root", bucket, IStorageService.StorageStage.processed,
                        Optional.empty(), "current/"));
        assertEquals("/root/test/static/managed_bucket/import/stored/processed/other", HfdsDataWriteService
                .getBasePath("/root", bucket, IStorageService.StorageStage.processed, Optional.empty(), "other"));

        // Transient output:
        try {
            HfdsDataWriteService.getBasePath("/root", bucket, IStorageService.StorageStage.transient_output,
                    Optional.empty(), "current/");
            fail("Should have thrown");
        } catch (Exception e) {
        }
        assertEquals("/root/test/static/managed_bucket/import/transient/current/testj-testm",
                HfdsDataWriteService.getBasePath("/root", bucket, IStorageService.StorageStage.transient_output,
                        Optional.of("testj-testm"), "current"));
    }

    /** Get some easy testing out the way
     *       HfdsDataWriteService.getExtension
     */
    @Test
    public void test_utilityMethods_getExtension() {
        assertEquals("", HfdsDataWriteService.getExtension(IStorageService.StorageStage.raw));
        assertEquals(".json", HfdsDataWriteService.getExtension(IStorageService.StorageStage.json));
        assertEquals(".json", HfdsDataWriteService.getExtension(IStorageService.StorageStage.processed));
        assertEquals(".json", HfdsDataWriteService.getExtension(IStorageService.StorageStage.transient_output));
    }

    /** Get some easy testing out the way
     *       HfdsDataWriteService.getCanonicalCodec
     *       HfdsDataWriteService.wrapOutputInCodec
     */
    @Test
    public void test_utilityMethods_codecs() {

        // No codec
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class)
                            .with(DataSchemaBean::storage_schema, BeanTemplateUtils.build(StorageSchemaBean.class)
                                    .with(StorageSchemaBean::processed,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec, "gzip")
                                                    .done().get())
                                    .done().get())
                            .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.raw);
            assertEquals(Optional.empty(), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertEquals(out_in, out_out);
        }
        // Malformed codec
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema,
                            BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                    BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec, "banana")
                                                    .done().get())
                                            .done().get())
                                    .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("banana"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertEquals(out_in, out_out);
        }
        // gz
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema,
                            BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                    BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec, "gzip")
                                                    .done().get())
                                            .done().get())
                                    .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("gz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is gzip: " + out_out.getClass().getSimpleName(),
                    out_out instanceof java.util.zip.GZIPOutputStream);
        }
        //gzip
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).with(
                            DataSchemaBean::storage_schema,
                            BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                    BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                            .with(StorageSchemaBean.StorageSubSchemaBean::codec, "gz").done().get())
                                    .done().get())
                            .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("gz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is gzip: " + out_out.getClass().getSimpleName(),
                    out_out instanceof java.util.zip.GZIPOutputStream);
        }
        //fr.sn
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema,
                            BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                    BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec, "fr.sz")
                                                    .done().get())
                                            .done().get())
                                    .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("fr.sz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is snappy framed: " + out_out.getClass().getSimpleName(),
                    out_out instanceof org.xerial.snappy.SnappyFramedOutputStream);
        }
        //snappy_frame
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema,
                            BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                    BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec,
                                                            "snappy_framed")
                                                    .done().get())
                                            .done().get())
                                    .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("fr.sz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is snappy framed: " + out_out.getClass().getSimpleName(),
                    out_out instanceof org.xerial.snappy.SnappyFramedOutputStream);
        }
        //sn
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema, BeanTemplateUtils.build(DataSchemaBean.class).with(
                            DataSchemaBean::storage_schema,
                            BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                    BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                            .with(StorageSchemaBean.StorageSubSchemaBean::codec, "sz").done().get())
                                    .done().get())
                            .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("sz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is snappy: " + out_out.getClass().getSimpleName(),
                    out_out instanceof org.xerial.snappy.SnappyOutputStream);
        }
        //snappy
        {
            final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                    .with(DataBucketBean::full_name, "/test/static")
                    .with(DataBucketBean::data_schema,
                            BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                    BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::json,
                                            BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                    .with(StorageSchemaBean.StorageSubSchemaBean::codec, "snappy")
                                                    .done().get())
                                            .done().get())
                                    .done().get())
                    .done().get();

            OutputStream out_in = new ByteArrayOutputStream();

            Optional<String> test = HfdsDataWriteService.getCanonicalCodec(
                    test_bucket.data_schema().storage_schema(), IStorageService.StorageStage.json);
            assertEquals(Optional.of("sz"), test);

            final OutputStream out_out = HfdsDataWriteService.wrapOutputInCodec(test, out_in);
            assertTrue("Stream is snappy: " + out_out.getClass().getSimpleName(),
                    out_out instanceof org.xerial.snappy.SnappyOutputStream);
        }

    }

    public static class TestBean {
        public TestBean(String a, String b) {
            _id = a;
            value = b;
        }

        public String _id;
        public String value;
    }

    protected HfdsDataWriteService<TestBean> getWriter(String name) {
        return getWriter(name, Optional.empty(), false);
    }

    protected HfdsDataWriteService<TestBean> getWriter(String name, Optional<String> secondary,
            boolean is_transient) {

        final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;

        GlobalPropertiesBean globals = BeanTemplateUtils.build(GlobalPropertiesBean.class)
                .with(GlobalPropertiesBean::distributed_root_dir, temp_dir)
                .with(GlobalPropertiesBean::local_yarn_config_dir, System.getenv("HADOOP_CONF_DIR")).done().get();

        MockHdfsStorageService storage_service = new MockHdfsStorageService(globals);

        final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                .with(DataBucketBean::full_name, name)
                .with(DataBucketBean::data_schema,
                        BeanTemplateUtils.build(DataSchemaBean.class).with(DataSchemaBean::storage_schema,
                                BeanTemplateUtils.build(StorageSchemaBean.class).with(StorageSchemaBean::processed,
                                        BeanTemplateUtils.build(StorageSchemaBean.StorageSubSchemaBean.class)
                                                //(no compression)
                                                //.with(StorageSchemaBean.StorageSubSchemaBean::codec, "snappy")
                                                .done().get())
                                        .done().get())
                                .done().get())
                .done().get();

        HfdsDataWriteService<TestBean> write_service = new HfdsDataWriteService<TestBean>(test_bucket,
                storage_service._data_service.get(),
                is_transient ? IStorageService.StorageStage.transient_output
                        : IStorageService.StorageStage.processed,
                is_transient ? Optional.of("testj-testm") : Optional.empty(), storage_service, secondary);

        return write_service;
    }

    @Test
    public void test_writerService_basics() {

        HfdsDataWriteService<TestBean> write_service = getWriter("/test/writer/basics");

        // First off a bunch of top level trivial calls
        {
            try {
                write_service.getCrudService();
                fail("Should have errored on getCrudService");
            } catch (Exception e) {
            }

            CompletableFuture<Long> cf = write_service.countObjects();
            try {
                cf.get();
                fail("Should have errored on getCrudService");
            } catch (Exception e) {
            }

            HfdsDataWriteService<JsonNode> write_service_json = (HfdsDataWriteService<JsonNode>) write_service
                    .getRawService();
            assertEquals(write_service_json._bucket, write_service._bucket);

            assertEquals(Optional.empty(),
                    write_service.getUnderlyingPlatformDriver(String.class, Optional.empty()));
        }

        // Check the batch service isn't loaded
        assertTrue("Writer not set", !write_service._writer.isSet());
    }

    @Test
    public void test_writerService_worker() throws Exception {
        final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;

        HfdsDataWriteService<TestBean> write_service = getWriter("/test/writer/worker");

        //(Tidy up)
        try {
            FileUtils.deleteDirectory(new File(temp_dir + "/data/" + write_service._bucket.full_name()));
        } catch (Exception e) {
        }

        HfdsDataWriteService<TestBean>.WriterWorker worker = write_service.new WriterWorker();

        // (no codec because this is called first)
        assertEquals(HfdsDataWriteService._process_id + "_" + worker._thread_id + "_1.json", worker.getFilename());

        // Check complete segment does nothing if no data has been written
        {
            worker.new_segment();

            File f = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/.spooldir/" + worker.getFilename())
                            .replace("/", File.separator));
            assertTrue("File should exist: " + f, f.exists());
            assertTrue("Expected segment: ", f.toString().endsWith("_1.json"));

            worker.complete_segment();

            assertTrue("File should not have moved: " + f, f.exists());

            File f2 = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/all_time/" + worker.getFilename())
                            .replace("/", File.separator));
            assertTrue("File should not exist: " + f2, !f2.exists());
        }
        // Check writes + non-empty segment
        {
            TestBean t1 = new TestBean("t1", "v1");
            TestBean t2 = new TestBean("t2", "v2");

            worker.new_segment();

            File f = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/.spooldir/" + worker.getFilename())
                            .replace("/", File.separator));
            assertTrue("File should exist: " + f, f.exists());
            assertTrue("Expected segment: ", f.toString().endsWith("_1.json"));

            // Write some objects out:

            worker.write("TEST1");
            worker.write("TEST2\n");
            worker.write(t1);
            worker.write(BeanTemplateUtils.toJson(t2));

            worker.complete_segment();

            assertTrue("File should have moved: " + f, !f.exists());

            File f2 = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/all_time/" + f.getName()).replace("/",
                            File.separator));
            assertTrue("File should exist: " + f2, f2.exists());
            assertTrue("Expected segment: ", f2.toString().endsWith("_1.json"));

            assertEquals("TEST1\nTEST2\n{\"_id\":\"t1\",\"value\":\"v1\"}\n{\"_id\":\"t2\",\"value\":\"v2\"}\n",
                    FileUtils.readFileToString(f2));
        }
        // Check basic write + second segment (list)
        {
            TestBean t1 = new TestBean("t1b", "v1b");
            TestBean t2 = new TestBean("t2b", "v2b");

            worker.new_segment();

            File f = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/.spooldir/" + worker.getFilename())
                            .replace("/", File.separator));
            assertTrue("File should exist: " + f, f.exists());
            assertTrue("Expected segment: ", f.toString().endsWith("_2.json"));

            // Write some object out:

            worker.write(Arrays.asList("TEST1b", "TEST2b\n", t1, BeanTemplateUtils.toJson(t2)));

            worker.complete_segment();

            assertTrue("File should have moved: " + f, !f.exists());

            File f2 = new File((temp_dir + "/data/" + write_service._bucket.full_name()
                    + "/managed_bucket/import/stored/processed/current/all_time/" + f.getName()).replace("/",
                            File.separator));
            assertTrue("File should exist: " + f2, f2.exists());
            assertTrue("Expected segment: ", f2.toString().endsWith("_2.json"));

            assertEquals(
                    "TEST1b\nTEST2b\n{\"_id\":\"t1b\",\"value\":\"v1b\"}\n{\"_id\":\"t2b\",\"value\":\"v2b\"}\n",
                    FileUtils.readFileToString(f2));
        }
    }

    @Test
    public void test_writerService_segmentationCriteria() throws Exception {
        final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;

        HfdsDataWriteService<TestBean> write_service = getWriter("/test/writer/segmentation");

        //(Tidy up)
        try {
            FileUtils.deleteDirectory(new File(temp_dir + "/data/" + write_service._bucket.full_name()));
        } catch (Exception e) {
        }

        HfdsDataWriteService<TestBean>.WriterWorker worker = write_service.new WriterWorker();

        worker.new_segment();

        assertTrue("No new segment", !worker.check_segment(100, 100, 1000));

        worker._state.curr_objects = 101;
        assertTrue("New segment on num", worker.check_segment(100, 100, 1000));
        assertTrue("New segment on num b", !worker.check_segment(102, 100, 1000));

        worker._state.curr_size_b = 101;
        assertTrue("New segment on size", worker.check_segment(102, 100, 1000));
        assertTrue("New segment on size b", !worker.check_segment(102, 102, 1000));

        Thread.sleep(100);
        assertTrue("New segment on time", worker.check_segment(102, 102, 50));

        worker._state.last_segmented = System.currentTimeMillis() + 1000;
        assertTrue("New segment on time b", worker.check_segment(102, 102, 100000L));
    }

    @Test
    public void test_writerService_end2end_primary() throws InterruptedException, ExecutionException {
        test_writerService_end2end(Optional.empty(), false);
    }

    @Test
    public void test_writerService_end2end_secondary() throws InterruptedException, ExecutionException {
        test_writerService_end2end(Optional.of("secondary_test"), false);
    }

    @Test
    public void test_writerService_end2end_transient() throws InterruptedException, ExecutionException {
        test_writerService_end2end(Optional.of("ping"), true);
    }

    public void test_writerService_end2end(Optional<String> secondary, boolean is_transient)
            throws InterruptedException, ExecutionException {
        final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;
        HfdsDataWriteService<TestBean> write_service = getWriter(
                "/test/writer/end2end/" + secondary.orElse("current") + "/", secondary, is_transient);

        //(Tidy up)
        try {
            FileUtils.deleteDirectory(new File(temp_dir + "/data/" + write_service._bucket.full_name()));
        } catch (Exception e) {
        }

        // Check lazy initialization only kicks in once      
        Optional<IBatchSubservice<TestBean>> x = write_service.getBatchWriteSubservice();
        assertEquals(x.get(), write_service._writer.get());
        Optional<IBatchSubservice<TestBean>> y = write_service.getBatchWriteSubservice();
        assertEquals(x.get(), y.get());

        IBatchSubservice<TestBean> batch = x.get();

        // Set up properties for testing:
        batch.setBatchProperties(Optional.of(1000), Optional.of(1000L), Optional.of(Duration.ofSeconds(2L)),
                Optional.of(3));

        Thread.sleep(1000L);
        // Check there are now 3 threads
        assertEquals(3, write_service._writer.get()._state._workers.getActiveCount());

        for (int i = 0; i < 20; ++i) {
            TestBean emit = new TestBean("id" + i, "val" + i);
            if (0 == (i % 2)) {
                if (0 == ((i / 2) % 2)) {
                    batch.storeObject(emit);
                } else {
                    CompletableFuture<Supplier<Object>> cf = write_service.storeObject(emit);
                    assertEquals(null, cf.get().get());
                }
            } else {
                if (0 == ((i / 2) % 2)) {
                    batch.storeObjects(Arrays.asList(emit));
                } else {
                    CompletableFuture<Tuple2<Supplier<List<Object>>, Supplier<Long>>> cf = write_service
                            .storeObjects(Arrays.asList(emit));
                    assertEquals(Collections.emptyList(), cf.get()._1().get());
                    assertEquals(1L, cf.get()._2().get().longValue());
                }
            }
        }
        final String infix = is_transient ? IStorageService.TRANSIENT_DATA_SUFFIX_SECONDARY
                : IStorageService.STORED_DATA_SUFFIX_PROCESSED_SECONDARY;
        final String infix_name = is_transient ? "testj-testm" : "";

        // Check that initially the files are stored locally
        File init_dir = new File((temp_dir + "/data/" + write_service._bucket.full_name() + infix
                + secondary.orElse("current") + "/" + infix_name + "/.spooldir/").replace("/", File.separator));
        File final_dir = new File((temp_dir + "/data/" + write_service._bucket.full_name() + infix
                + secondary.orElse("current") + "/" + infix_name + "/all_time/").replace("/", File.separator));

        {
            int ii = 1;
            for (; ii <= 50; ++ii) {
                Thread.sleep(250L);
                if (6 == init_dir.list().length) {
                    break;
                }
            }
            System.out.println("(exited from file system check after " + ii * 2.5 + " s)");
        }

        assertEquals("Needs to have 6 files, including 3x .crc: " + Arrays.toString(init_dir.list()), 6,
                init_dir.list().length); //*2 because CRC
        assertTrue(
                "Nothing in final dir: " + (final_dir.exists() ? Arrays.toString(final_dir.list()) : "(non-exist)"),
                !final_dir.exists() || final_dir.list().length == 0);

        {
            int ii = 1;
            for (; ii <= 50; ++ii) {
                Thread.sleep(2500L);
                if (0 == init_dir.list().length) {
                    break;
                }
            }
            System.out.println("(exited from file system check after " + ii * 2.5 + " s)");
        }

        assertEquals(0, init_dir.list().length); //*2 because CRC
        assertEquals(6, final_dir.list().length); //*2 because CRC      

        // Change batch properties so that will segment (also check number of threads reduces)
        batch.setBatchProperties(Optional.of(10), Optional.of(1000L), Optional.of(Duration.ofSeconds(5L)),
                Optional.of(1));
        List<TestBean> l1 = IntStream.range(0, 8).boxed().map(i -> new TestBean("id" + i, "val" + i))
                .collect(Collectors.toList());
        List<TestBean> l2 = IntStream.range(8, 15).boxed().map(i -> new TestBean("id" + i, "val" + i))
                .collect(Collectors.toList());

        batch.storeObjects(l1);
        Thread.sleep(750L);
        assertEquals(6, final_dir.list().length); //*2 because CRC      
        System.out.println("Found: 6 files: " + Arrays.stream(final_dir.list()).collect(Collectors.joining(";")));

        batch.storeObjects(l2);
        System.out.println("Added 7 more objects at " + new Date());
        for (int jj = 0; jj < 5; ++jj) {
            Thread.sleep(1500L);
            if (final_dir.list().length > 6)
                break;
        }
        System.out.println("(Check init dir cleared: "
                + Arrays.stream(init_dir.list()).collect(Collectors.joining(";")) + ")");
        assertEquals("Should have 8 files: " + Arrays.stream(final_dir.list()).collect(Collectors.joining(";")), 8,
                final_dir.list().length); //*2 because CRC   

        System.out.println("(Deleting datastore and checking it's empty)");
        assertTrue("Deleted datastore: ", write_service.deleteDatastore().get()); // (just quick test since this uses handleBucketDeletion which is tested elsewhere...)
        String[] final_dir_list = Optional.ofNullable(final_dir.list()).orElse(new String[0]);
        assertEquals("Should have 0 files: " + Arrays.stream(final_dir_list).collect(Collectors.joining(";")), 0,
                final_dir_list.length); //*2 because CRC   
    }
}