com.ikanow.aleph2.analytics.services.TestAnalyticsContext_FileSystemChecks.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.aleph2.analytics.services.TestAnalyticsContext_FileSystemChecks.java

Source

/*******************************************************************************
 * Copyright 2015, The IKANOW Open Source Project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package com.ikanow.aleph2.analytics.services;

import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

import org.apache.commons.io.FileUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.Before;
import org.junit.Test;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.ikanow.aleph2.analytics.utils.ErrorUtils;
import com.ikanow.aleph2.data_model.interfaces.data_services.IManagementDbService;
import com.ikanow.aleph2.data_model.interfaces.data_services.IStorageService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.ISecurityService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext;
import com.ikanow.aleph2.data_model.interfaces.shared_services.MockSecurityService;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadBean;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobInputBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean.MasterEnrichmentType;
import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean;
import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils;
import com.ikanow.aleph2.data_model.utils.ModuleUtils;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigValueFactory;

import fj.data.Either;
import fj.data.Validation;

public class TestAnalyticsContext_FileSystemChecks {

    static final Logger _logger = LogManager.getLogger();

    protected ObjectMapper _mapper = BeanTemplateUtils.configureMapper(Optional.empty());
    protected Injector _app_injector;

    @Inject
    protected IServiceContext _service_context;

    @Before
    public void injectModules() throws Exception {
        _logger.info("run injectModules");

        final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;

        final Config config = ConfigFactory
                .parseFile(new File("./example_config_files/context_local_test.properties"))
                .withValue("globals.local_root_dir", ConfigValueFactory.fromAnyRef(temp_dir))
                .withValue("globals.local_cached_jar_dir", ConfigValueFactory.fromAnyRef(temp_dir))
                .withValue("globals.distributed_root_dir", ConfigValueFactory.fromAnyRef(temp_dir))
                .withValue("globals.local_yarn_config_dir", ConfigValueFactory.fromAnyRef(temp_dir));

        try {
            _app_injector = ModuleUtils.createTestInjector(Arrays.asList(), Optional.of(config));
            _app_injector.injectMembers(this);
        } catch (Exception e) {
            try {
                e.printStackTrace();
            } catch (Exception ee) {
                System.out.println(ErrorUtils.getLongForm("{0}", e));
            }
        }
    }

    @Test
    public void test_storageService_timedInputPaths() throws InterruptedException, ExecutionException {

        final AnalyticsContext test_context = _app_injector.getInstance(AnalyticsContext.class);

        File f = new File(_service_context.getStorageService().getBucketRootPath() + "/this_bucket"
                + IStorageService.STORED_DATA_SUFFIX_PROCESSED);
        FileUtils.deleteQuietly(f);

        final AnalyticThreadJobBean.AnalyticThreadJobInputBean analytic_input1 = BeanTemplateUtils
                .build(AnalyticThreadJobBean.AnalyticThreadJobInputBean.class)
                .with(AnalyticThreadJobBean.AnalyticThreadJobInputBean::data_service, "storage_service")
                .with(AnalyticThreadJobBean.AnalyticThreadJobInputBean::resource_name_or_id, "/this_bucket") //(just avoids DB check)
                .with(AnalyticThreadJobBean.AnalyticThreadJobInputBean::config,
                        BeanTemplateUtils.build(AnalyticThreadJobBean.AnalyticThreadJobInputConfigBean.class)
                                .with(AnalyticThreadJobBean.AnalyticThreadJobInputConfigBean::time_min, "1 year")
                                .done().get())
                .done().get();

        final AnalyticThreadJobBean analytic_job1 = BeanTemplateUtils.build(AnalyticThreadJobBean.class)
                .with(AnalyticThreadJobBean::name, "test_name1")
                .with(AnalyticThreadJobBean::analytic_technology_name_or_id, "test_analytic_tech_id")
                .with(AnalyticThreadJobBean::inputs, Arrays.asList(analytic_input1))
                .with(AnalyticThreadJobBean::library_names_or_ids, Arrays.asList("id1", "name2")).done().get();

        final DataBucketBean test_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                .with(DataBucketBean::_id, "this_bucket").with(DataBucketBean::full_name, "/this_bucket")
                .with(DataBucketBean::analytic_thread,
                        BeanTemplateUtils.build(AnalyticThreadBean.class)
                                .with(AnalyticThreadBean::jobs, Arrays.asList(analytic_job1)).done().get())
                .done().get();

        test_context._service_context.getService(IManagementDbService.class, Optional.empty()).get()
                .getDataBucketStore().storeObject(test_bucket).get();

        // Check falls back to current if dirs don't exist

        assertTrue("Falls back to full storage",
                test_context.getInputPaths(Optional.of(test_bucket), analytic_job1, analytic_input1).get(0)
                        .endsWith("/this_bucket/managed_bucket/import/stored/processed/current/**/*"));

        @SuppressWarnings("deprecation")
        final int year = 1900 + new Date().getYear();

        createDirs(f,
                Arrays.asList("test_" + (year - 3), "test_" + (year - 2), "test_" + (year - 1), "test_" + (year)));

        final List<String> res = test_context.getInputPaths(Optional.of(test_bucket), analytic_job1,
                analytic_input1);

        assertEquals("Timed slices: " + res.stream().collect(Collectors.joining(";")),
                Arrays.asList("/current/test_" + (year - 1) + "/*", "/current/test_" + year + "/*"),
                res.stream().map(s -> s.substring(s.indexOf("/current/"))).sorted().collect(Collectors.toList()));

        // Check high granularity mode is disabled:

        try {
            final AnalyticThreadJobBean.AnalyticThreadJobInputBean analytic_input_fail = BeanTemplateUtils
                    .clone(analytic_input1)
                    .with(AnalyticThreadJobBean.AnalyticThreadJobInputBean::config, BeanTemplateUtils
                            .build(AnalyticThreadJobBean.AnalyticThreadJobInputConfigBean.class)
                            .with(AnalyticThreadJobBean.AnalyticThreadJobInputConfigBean::high_granularity_filter,
                                    true)
                            .done().get())
                    .done();

            final AnalyticThreadJobBean analytic_job_fail = BeanTemplateUtils.clone(analytic_job1)
                    .with(AnalyticThreadJobBean::inputs, Arrays.asList(analytic_input_fail)).done();

            final DataBucketBean test_bucket_fail = BeanTemplateUtils.clone(test_bucket)
                    .with(DataBucketBean::analytic_thread,
                            BeanTemplateUtils.build(AnalyticThreadBean.class)
                                    .with(AnalyticThreadBean::jobs, Arrays.asList(analytic_job_fail)).done().get())
                    .done();

            test_context.getInputPaths(Optional.of(test_bucket_fail), analytic_job_fail, analytic_input_fail);
            fail("Should have thrown error");
        } catch (Exception e) {
            //we're good
        }
    }

    @Test
    public void test_externalEmit() throws JsonProcessingException, IOException, InterruptedException {
        test_externalEmit_worker(false);
    }

    @Test
    public void test_externalEmit_testMode() throws JsonProcessingException, IOException, InterruptedException {
        test_externalEmit_worker(true);
    }

    public void test_externalEmit_worker(boolean is_test)
            throws JsonProcessingException, IOException, InterruptedException {

        final MockSecurityService mock_security = (MockSecurityService) _service_context.getSecurityService();

        // Create some buckets:

        // 0) My bucket

        final AnalyticsContext test_context = _app_injector.getInstance(AnalyticsContext.class);

        final AnalyticThreadJobInputBean input = BeanTemplateUtils.build(AnalyticThreadJobInputBean.class)
                .with(AnalyticThreadJobInputBean::resource_name_or_id, "/test/analytics/batch").done().get();

        final AnalyticThreadJobBean job = BeanTemplateUtils.build(AnalyticThreadJobBean.class)
                .with(AnalyticThreadJobBean::name, "test")
                .with(AnalyticThreadJobBean::analytic_type, MasterEnrichmentType.batch)
                .with(AnalyticThreadJobBean::inputs, Arrays.asList(input)).done().get();

        final DataBucketBean my_bucket = BeanTemplateUtils.build(DataBucketBean.class)
                .with(DataBucketBean::full_name, is_test ? "/aleph2_testing/useriid/test/me" : "/test/me")
                .with(DataBucketBean::owner_id, "me")
                .with(DataBucketBean::external_emit_paths, Arrays.asList("/test/analytics/*")).done().get();

        test_context.setBucket(my_bucket);

        // 2) Batch analytic bucket

        //(see TestAnalyticsContext_FileSystemChecks)

        final DataBucketBean analytic_bucket_batch = BeanTemplateUtils.build(DataBucketBean.class)
                .with(DataBucketBean::full_name, "/test/analytics/batch")
                .with(DataBucketBean::analytic_thread, BeanTemplateUtils.build(AnalyticThreadBean.class)
                        .with(AnalyticThreadBean::jobs, Arrays.asList(job)).done().get())
                .done().get();
        test_context._service_context.getService(IManagementDbService.class, Optional.empty()).get()
                .getDataBucketStore().storeObject(analytic_bucket_batch, true).join();
        mock_security.setUserMockRole("me", analytic_bucket_batch.full_name(), ISecurityService.ACTION_READ_WRITE,
                true);

        File f_tmp = new File(_service_context.getStorageService().getBucketRootPath()
                + analytic_bucket_batch.full_name() + IStorageService.TEMP_DATA_SUFFIX);
        File f_import = new File(_service_context.getStorageService().getBucketRootPath()
                + analytic_bucket_batch.full_name() + IStorageService.TO_IMPORT_DATA_SUFFIX);
        FileUtils.deleteQuietly(f_tmp);
        FileUtils.deleteQuietly(f_import);
        createDirs(f_tmp, Arrays.asList(""));
        createDirs(f_import, Arrays.asList(""));
        assertTrue("Should exist:" + f_tmp, f_tmp.exists());
        assertTrue("Should exist:" + f_import, f_import.exists());

        // create some directories

        // emit the objects

        final Validation<BasicMessageBean, JsonNode> ret_val_1 = test_context.emitObject(
                Optional.of(analytic_bucket_batch), job,
                Either.left((ObjectNode) _mapper.readTree("{\"test\":\"batch_succeed\"}")), Optional.empty());

        assertTrue("Should work: " + ret_val_1.validation(f -> f.message(), s -> s.toString()),
                ret_val_1.isSuccess());
        assertTrue(test_context._mutable_state.external_buckets.get(analytic_bucket_batch.full_name()).isLeft());

        // no files to start with (because the output hasn't been flushed)
        Thread.sleep(500L); //(safety)

        assertEquals(0, f_import.list().length);
        if (is_test)
            assertTrue(0 == f_tmp.list().length);
        else
            assertFalse(0 == f_tmp.list().length);

        test_context.flushBatchOutput(Optional.of(my_bucket), job);

        // now check again (after a "safety sleep")
        Thread.sleep(500L);

        assertEquals(0, f_tmp.list().length);
        if (is_test)
            assertTrue(0 == f_import.list().length);
        else
            assertFalse(0 == f_import.list().length);
    }

    //////////////////////////////////////////////////////////////

    //UTILS

    public void createDirs(File f, List<String> dirs) {
        dirs.stream().forEach(dir -> {
            try {
                final File dir_obj = new File(f.toString() + "/" + dir);
                FileUtils.deleteDirectory(dir_obj);
                FileUtils.forceMkdir(dir_obj);

                //DEBUG
                //System.out.println("CREATED " + new File(f.toString() + "/" + dir));
            } catch (Exception e) {
                //DEBUG
                //e.printStackTrace();
            }
        });
    }

}