com.moz.fiji.mapreduce.IntegrationTestJobHistoryFijiTable.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.mapreduce.IntegrationTestJobHistoryFijiTable.java

Source

/**
 * (c) Copyright 2013 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.mapreduce;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.moz.fiji.mapreduce.avro.generated.JobHistoryEntry;
import com.moz.fiji.mapreduce.framework.JobHistoryFijiTable;
import com.moz.fiji.mapreduce.framework.FijiConfKeys;
import com.moz.fiji.mapreduce.output.MapReduceJobOutputs;
import com.moz.fiji.mapreduce.produce.FijiProduceJobBuilder;
import com.moz.fiji.mapreduce.produce.FijiProducer;
import com.moz.fiji.mapreduce.produce.ProducerContext;
import com.moz.fiji.mapreduce.tools.FijiJobHistory;
import com.moz.fiji.schema.Fiji;
import com.moz.fiji.schema.FijiDataRequest;
import com.moz.fiji.schema.FijiRowData;
import com.moz.fiji.schema.FijiTable;
import com.moz.fiji.schema.FijiURI;
import com.moz.fiji.schema.layout.FijiTableLayout;
import com.moz.fiji.schema.testutil.AbstractFijiIntegrationTest;

/**
 * Integration test for the job history table.
 */
public class IntegrationTestJobHistoryFijiTable extends AbstractFijiIntegrationTest {
    private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestJobHistoryFijiTable.class);

    /**
     * Test that makes sure the job history table is installed correctly and can be opened.
     */
    @Test
    public void testInstallAndOpen() throws Exception {
        Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            // This will throw an IOException if there's difficulty opening the table
            final JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);
            jobHistory.close();
        } finally {
            fiji.release();
        }
    }

    /**
     * This test sets up an older version of the job history table and ensures that
     * it gets updated upon install.
     *
     * @throws Exception Upon failure to install or upgrade the job history table.
     */
    @Test
    public void testUpgradeJobHistoryTable() throws Exception {
        final String tableName = "job_history";

        // old table layout.
        final String tableLayoutResource = "/com.moz.fiji/mapreduce/job-history-layout.json";

        // all job history tables have at least this version.
        final String prevTableLayoutVersion = "1";

        // latest job history layout version
        final String jhTableLayoutVersion = "2";

        Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            // If the job history table exists, delete it.
            if (fiji.getTableNames().contains(tableName)) {
                fiji.deleteTable(tableName);
            }
            // Create a job history table with the older layout
            fiji.createTable(FijiTableLayout.createFromEffectiveJsonResource(tableLayoutResource).getDesc());
            assertEquals(fiji.getMetaTable().getTableLayout(tableName).getDesc().getLayoutId(),
                    prevTableLayoutVersion);

            // Now install job history table
            final JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);
            try {
                assertEquals(fiji.getMetaTable().getTableLayout(tableName).getDesc().getLayoutId(),
                        jhTableLayoutVersion);
            } finally {
                jobHistory.close();
            }
        } finally {
            fiji.release();
        }
    }

    /** A private inner producer to test job recording. */
    public static class EmailDomainProducer extends FijiProducer {
        /** {@inheritDoc} */
        @Override
        public FijiDataRequest getDataRequest() {
            // We only need to read the most recent email address field from the user's row.
            return FijiDataRequest.create("info", "email");
        }

        /** {@inheritDoc} */
        @Override
        public String getOutputColumn() {
            return "derived:domain";
        }

        /** {@inheritDoc} */
        @Override
        public void produce(FijiRowData input, ProducerContext context) throws IOException {
            if (!input.containsColumn("info", "email")) {
                // This user doesn't have an email address.
                return;
            }
            String email = input.getMostRecentValue("info", "email").toString();
            int atSymbol = email.indexOf("@");
            if (atSymbol < 0) {
                // Couldn't find the '@' in the email address. Give up.
                return;
            }
            String domain = email.substring(atSymbol + 1);
            context.put(domain);
        }
    }

    /** A private inner producer to test job recording of failed jobs. */
    public static class BrokenEmailDomainProducer extends FijiProducer {
        /** {@inheritDoc} */
        @Override
        public FijiDataRequest getDataRequest() {
            // We only need to read the most recent email address field from the user's row.
            return FijiDataRequest.create("info", "email");
        }

        /** {@inheritDoc} */
        @Override
        public String getOutputColumn() {
            return "derived:domain";
        }

        /** {@inheritDoc} */
        @Override
        public void produce(FijiRowData input, ProducerContext context) throws IOException {
            throw new RuntimeException("This producer always fails.");
        }
    }

    /**
     * Test of all the basic information recorded by a mapper.
     */
    @Test
    public void testMappers() throws Exception {
        createAndPopulateFooTable();
        final Configuration jobConf = getConf();
        // Set a value in the configuration. We'll check to be sure we can retrieve it later.
        jobConf.set("conf.test.animal.string", "squirrel");
        final Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            final FijiURI fooTableURI = FijiURI.newBuilder(getFijiURI()).withTableName("foo").build();
            final JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);

            try {
                // Construct a Producer for this table.
                final FijiProduceJobBuilder builder = FijiProduceJobBuilder.create().withConf(jobConf)
                        .withInputTable(fooTableURI).withProducer(EmailDomainProducer.class)
                        .withOutput(MapReduceJobOutputs.newDirectFijiTableMapReduceJobOutput(fooTableURI));
                FijiMapReduceJob mrJob = builder.build();

                // Record the jobId and run the job.
                String jobName = mrJob.getHadoopJob().getJobName();
                LOG.info("About to run job: " + jobName);
                assertTrue(mrJob.run());
                String jobId = mrJob.getHadoopJob().getJobID().toString();
                LOG.info("Job was run with id: " + jobId);

                // Retrieve the recorded values and sanity test them.
                JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
                assertEquals(jobEntry.getJobName(), jobName);
                assertEquals(jobEntry.getJobId(), jobId);
                assertTrue(jobEntry.getJobStartTime() < jobEntry.getJobEndTime());
                assertEquals("SUCCEEDED", jobEntry.getJobEndStatus());

                // Check counters. We don't know the exact number of rows in the foo table, so just check if
                // it's greater than 0.
                final String countersString = jobEntry.getJobCounters();
                final Pattern countersPattern = Pattern.compile("PRODUCER_ROWS_PROCESSED=(\\d+)");
                final Matcher countersMatcher = countersPattern.matcher(countersString);
                assertTrue(countersMatcher.find());
                assertTrue(Integer.parseInt(countersMatcher.group(1)) > 0);

                // Test to make sure the Configuration has the correct producer class, and records the value
                // we set previously.
                final String configString = jobEntry.getJobConfiguration();
                final Configuration config = new Configuration();
                config.addResource(new ByteArrayInputStream(configString.getBytes()));
                assertTrue(EmailDomainProducer.class == config.getClass(FijiConfKeys.FIJI_PRODUCER_CLASS, null));
                assertEquals("Couldn't retrieve configuration field from deserialized configuration.", "squirrel",
                        config.get("conf.test.animal.string"));
            } finally {
                jobHistory.close();
            }
        } finally {
            fiji.release();
        }
    }

    /**
     * Test that runs a producer that always fails and checks to be sure that it's recorded in the
     * history table with a failure.
     */
    @Test
    public void testFailingJob() throws Exception {
        createAndPopulateFooTable();
        final Configuration jobConf = getConf();
        final Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            final FijiURI fooTableURI = FijiURI.newBuilder(getFijiURI()).withTableName("foo").build();
            final JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);
            try {
                // Construct a Producer for this table.
                final FijiProduceJobBuilder builder = FijiProduceJobBuilder.create().withConf(jobConf)
                        .withInputTable(fooTableURI).withProducer(BrokenEmailDomainProducer.class)
                        .withOutput(MapReduceJobOutputs.newDirectFijiTableMapReduceJobOutput(fooTableURI));
                FijiMapReduceJob mrJob = builder.build();

                // Record the jobId and run the job. Make sure it completes and failed.
                String jobName = mrJob.getHadoopJob().getJobName();
                LOG.info("About to run failing job: " + jobName);
                assertFalse(mrJob.run());
                String jobId = mrJob.getHadoopJob().getJobID().toString();
                LOG.info("Job was run with id: " + jobId);
                assertTrue(mrJob.getHadoopJob().isComplete());
                assertFalse(mrJob.getHadoopJob().isSuccessful());

                // Retrieve the job status from the history table and make sure it failed.
                JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
                assertEquals("FAILED", jobEntry.getJobEndStatus());
            } finally {
                jobHistory.close();
            }
        } finally {
            fiji.release();
        }
    }

    /**
     * Test that makes sure information is recorded correctly for a job run with .submit() instead
     * of .run(). Only checks timing info.
     */
    @Test
    public void testSubmit() throws Exception {
        createAndPopulateFooTable();
        final Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            final FijiURI fooTableURI = FijiURI.newBuilder(getFijiURI()).withTableName("foo").build();
            JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);

            try {
                // Construct a Producer for this table.
                FijiProduceJobBuilder builder = FijiProduceJobBuilder.create().withConf(getConf())
                        .withInputTable(fooTableURI).withProducer(EmailDomainProducer.class)
                        .withOutput(MapReduceJobOutputs.newDirectFijiTableMapReduceJobOutput(fooTableURI));
                FijiMapReduceJob mrJob = builder.build();

                LOG.info("About to submit job: " + mrJob.getHadoopJob().getJobName());
                FijiMapReduceJob.Status status = mrJob.submit();
                while (!status.isComplete()) {
                    Thread.sleep(1000L);
                }
                assertTrue(status.isSuccessful());
                String jobId = mrJob.getHadoopJob().getJobID().toString();
                LOG.info("Job successfully submitted and run. Id: " + jobId);

                // The job recording takes place in a separate thread, so sleep a bit to give it time to
                // write out.
                Thread.sleep(5000L);

                JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
                assertTrue(jobEntry.getJobStartTime() < jobEntry.getJobEndTime());
            } finally {
                jobHistory.close();
            }
        } finally {
            fiji.release();
        }
    }

    /**
     * Tests that a job will still run to completion even without an installed job history table.
     */
    @Test
    public void testMissingHistoryTableNonfatal() throws Exception {
        createAndPopulateFooTable();
        // Do not create a job history table.
        final Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            final FijiTable fooTable = fiji.openTable("foo");
            try {
                final FijiProduceJobBuilder builder = FijiProduceJobBuilder.create().withConf(getConf())
                        .withInputTable(fooTable.getURI()).withProducer(EmailDomainProducer.class)
                        .withOutput(MapReduceJobOutputs.newDirectFijiTableMapReduceJobOutput(fooTable.getURI()));
                final FijiMapReduceJob mrJob = builder.build();
                assertTrue(mrJob.run());
            } finally {
                fooTable.release();
            }
        } finally {
            fiji.release();
        }
    }

    /**
     * Tests the output of the job-history tool.
     */
    @Test
    public void testJobHistoryTool() throws Exception {
        createAndPopulateFooTable();
        final Configuration jobConf = getConf();
        final Fiji fiji = Fiji.Factory.open(getFijiURI());
        try {
            final FijiURI fooTableURI = FijiURI.newBuilder(getFijiURI()).withTableName("foo").build();

            // Construct two producers for this table.
            final FijiProduceJobBuilder builderEmailDomain = FijiProduceJobBuilder.create().withConf(jobConf)
                    .withInputTable(fooTableURI).withProducer(EmailDomainProducer.class)
                    .withOutput(MapReduceJobOutputs.newDirectFijiTableMapReduceJobOutput(fooTableURI));
            FijiMapReduceJob mrJobOne = builderEmailDomain.build();
            FijiMapReduceJob mrJobTwo = builderEmailDomain.build();

            // Run the first produce job.
            String jobOneName = mrJobOne.getHadoopJob().getJobName();
            LOG.info("About to run job: " + jobOneName);
            assertTrue(mrJobOne.run());
            String jobOneId = mrJobOne.getHadoopJob().getJobID().toString();
            LOG.info("Job was run with id: " + jobOneId);

            // Get the StdOut from the job-history tool.
            String jobHistoryStdOut = runTool(new FijiJobHistory(), new String[] { "--fiji=" + fiji.getURI(),
                    "--job-id=" + jobOneId, "--verbose", "--counter-names", }).getStdout("Utf-8");

            JobHistoryFijiTable jobHistory = JobHistoryFijiTable.open(fiji);

            // Check if the StdOut contains the job history for the first job
            try {
                JobHistoryEntry jobOneEntry = jobHistory.getJobDetails(jobOneId);
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobName()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobId()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobStartTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobEndTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobEndStatus()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobCounters()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobConfiguration()));
                assertTrue(jobHistoryStdOut
                        .contains(Arrays.toString(jobOneEntry.getCountersFamily().keySet().toArray())));
                // check for a specific counter to guard against an empty array in the check above
                assertTrue(jobHistoryStdOut
                        .contains("com.moz.fiji.mapreduce.framework.JobHistoryCounters:PRODUCER_ROWS_PROCESSED"));

                // Run the second produce job.
                String jobTwoName = mrJobTwo.getHadoopJob().getJobName();
                LOG.info("About to run job: " + jobTwoName);
                assertTrue(mrJobTwo.run());
                String jobTwoId = mrJobTwo.getHadoopJob().getJobID().toString();
                LOG.info("Job was run with id: " + jobTwoId);

                // Get the StdOut from the job-history tool, again.
                jobHistoryStdOut = runTool(new FijiJobHistory(), new String[] { "--fiji=" + fiji.getURI() })
                        .getStdout("Utf-8");

                // Check if the StdOut contains the relevant job histories for each job.
                // Check the first produce job again.
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobName()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobId()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobStartTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobEndTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobEndStatus()));

                // Check the second produce job.
                JobHistoryEntry jobTwoEntry = jobHistory.getJobDetails(jobTwoId);
                assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobName()));
                assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobId()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobTwoEntry.getJobStartTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(new Date(jobTwoEntry.getJobEndTime()).toString()));
                assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobEndStatus()));
            } finally {
                jobHistory.close();
            }
        } finally {
            fiji.release();
        }
    }
}