Java tutorial
/** * (c) Copyright 2013 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.mapreduce.output.DirectKijiTableMapReduceJobOutput; import org.kiji.schema.Kiji; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiTable; import org.kiji.schema.testutil.AbstractKijiIntegrationTest; /** * Integration test for the job history table. */ public class IntegrationTestJobHistoryKijiTable extends AbstractKijiIntegrationTest { private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestJobHistoryKijiTable.class); /** * Installs the job history table. */ @Before public final void setupIntegrationTestJobHistoryKijiTable() throws Exception { final Kiji kiji = Kiji.Factory.open(getKijiURI()); try { LOG.info("Table installing."); JobHistoryKijiTable.install(kiji); LOG.info("Table installed."); } finally { kiji.release(); } } /** * Test that makes sure the job history table is installed correctly and can be opened. */ @Test public void testInstallAndOpen() throws Exception { Kiji kiji = Kiji.Factory.open(getKijiURI()); // This will throw an IOException if there's difficulty opening the table final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji); jobHistory.close(); kiji.release(); } /** A private inner producer to test job recording. */ public static class EmailDomainProducer extends KijiProducer { /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { // We only need to read the most recent email address field from the user's row. return KijiDataRequest.create("info", "email"); } /** {@inheritDoc} */ @Override public String getOutputColumn() { return "derived:domain"; } /** {@inheritDoc} */ @Override public void produce(KijiRowData input, ProducerContext context) throws IOException { if (!input.containsColumn("info", "email")) { // This user doesn't have an email address. return; } String email = input.getMostRecentValue("info", "email").toString(); int atSymbol = email.indexOf("@"); if (atSymbol < 0) { // Couldn't find the '@' in the email address. Give up. return; } String domain = email.substring(atSymbol + 1); context.put(domain); } } /** * Test of all the basic information recorded by a mapper. */ @Test public void testMappers() throws Exception { createAndPopulateFooTable(); final Configuration jobConf = getConf(); // Set a value in the configuration. We'll check to be sure we can retrieve it later. jobConf.set("conf.test.animal.string", "squirrel"); final Kiji kiji = Kiji.Factory.open(getKijiURI()); final KijiTable fooTable = kiji.openTable("foo"); final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji); // Construct a Producer for this table. final KijiProduceJobBuilder builder = KijiProduceJobBuilder.create().withConf(jobConf) .withInputTable(fooTable).withProducer(EmailDomainProducer.class) .withOutput(new DirectKijiTableMapReduceJobOutput(fooTable)); MapReduceJob mrJob = builder.build(); // Record the jobId and run the job. String jobName = mrJob.getHadoopJob().getJobName(); LOG.info("About to run job: " + jobName); assertTrue(mrJob.run()); String jobId = mrJob.getHadoopJob().getJobID().toString(); LOG.info("Job was run with id: " + jobId); // Retrieve the recorded values and sanity test them. KijiRowData jobRecord = jobHistory.getJobDetails(jobId); assertTrue(jobRecord.containsColumn("info", "jobName")); assertEquals(jobRecord.getMostRecentValue("info", "jobName").toString(), jobName); assertTrue(jobRecord.containsColumn("info", "jobId")); assertEquals(jobRecord.getMostRecentValue("info", "jobId").toString(), jobId); assertTrue(jobRecord.containsColumn("info", "startTime")); assertTrue(jobRecord.containsColumn("info", "endTime")); assertTrue(jobRecord.<Long>getMostRecentValue("info", "startTime") < jobRecord .<Long>getMostRecentValue("info", "endTime")); // Check counters. We don't know the exact number of rows in the foo table, so just check if // it's greater than 0. assertTrue(jobRecord.containsColumn("info", "counters")); final String countersString = jobRecord.getMostRecentValue("info", "counters").toString(); final Pattern countersPattern = Pattern.compile("PRODUCER_ROWS_PROCESSED=(\\d+)"); final Matcher countersMatcher = countersPattern.matcher(countersString); assertTrue(countersMatcher.find()); assertTrue(Integer.parseInt(countersMatcher.group(1)) > 0); // Test to make sure the Configuration has the correct producer class, and records the value // we set previously. assertTrue(jobRecord.containsColumn("info", "configuration")); final String configString = jobRecord.getMostRecentValue("info", "configuration").toString(); final Configuration config = new Configuration(); config.addResource(new ByteArrayInputStream(configString.getBytes())); assertTrue(EmailDomainProducer.class == config.getClass(KijiConfKeys.KIJI_PRODUCER_CLASS, null)); assertEquals("Couldn't retrieve configuration field from deserialized configuration.", "squirrel", config.get("conf.test.animal.string")); fooTable.close(); jobHistory.close(); kiji.release(); } /** * Test that makes sure information is recorded correctly for a job run with .submit() instead * of .run(). Only checks timing info. */ @Test public void testSubmit() throws Exception { createAndPopulateFooTable(); final Kiji kiji = Kiji.Factory.open(getKijiURI()); final KijiTable fooTable = kiji.openTable("foo"); JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji); // Construct a Producer for this table. KijiProduceJobBuilder builder = KijiProduceJobBuilder.create().withConf(getConf()).withInputTable(fooTable) .withProducer(EmailDomainProducer.class) .withOutput(new DirectKijiTableMapReduceJobOutput(fooTable)); MapReduceJob mrJob = builder.build(); LOG.info("About to submit job: " + mrJob.getHadoopJob().getJobName()); MapReduceJob.Status status = mrJob.submit(); while (!status.isComplete()) { Thread.sleep(1000L); } assertTrue(status.isSuccessful()); String jobId = mrJob.getHadoopJob().getJobID().toString(); LOG.info("Job successfully submitted and run. Id: " + jobId); // The job recording takes place in a separate thread, so sleep a bit to give it time to write // out. Thread.sleep(5000L); KijiRowData jobRecord = jobHistory.getJobDetails(jobId); assertTrue(jobRecord.containsColumn("info", "startTime")); assertTrue(jobRecord.containsColumn("info", "endTime")); assertTrue(jobRecord.<Long>getMostRecentValue("info", "startTime") < jobRecord .<Long>getMostRecentValue("info", "endTime")); fooTable.close(); jobHistory.close(); kiji.release(); } /** * Tests that a job will still run to completion even without an installed job history table. */ @Test public void testMissingHistoryTableNonfatal() throws Exception { createAndPopulateFooTable(); final Kiji kiji = Kiji.Factory.open(getKijiURI()); final KijiTable fooTable = kiji.openTable("foo"); kiji.deleteTable(JobHistoryKijiTable.getInstallName()); final KijiProduceJobBuilder builder = KijiProduceJobBuilder.create().withConf(getConf()) .withInputTable(fooTable).withProducer(EmailDomainProducer.class) .withOutput(new DirectKijiTableMapReduceJobOutput(fooTable)); final MapReduceJob mrJob = builder.build(); assertTrue(mrJob.run()); fooTable.close(); kiji.release(); } }