org.kiji.mapreduce.framework.JobHistoryKijiTable.java Source code

Java tutorial

Introduction

Here is the source code for org.kiji.mapreduce.framework.JobHistoryKijiTable.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kiji.mapreduce.framework;

import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.NavigableMap;

import com.google.common.collect.Maps;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.mapreduce.avro.generated.JobHistoryEntry;
import org.kiji.schema.AtomicKijiPutter;
import org.kiji.schema.EntityId;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiDataRequestBuilder;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiRowScanner;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.layout.KijiTableLayout;

/**
 * A class providing an API to install and access the job history kiji table.
 *
 * Used in places like KijiMapReduceJob to record information about jobs run through Kiji.
 */
@ApiAudience.Framework
@ApiStability.Evolving
public final class JobHistoryKijiTable implements Closeable {
    /** Every existing job history table has at least this version. */
    private static final String PREV_TABLE_LAYOUT_VERSION = "1";
    /** The name of the table storing a history of completed jobs. */
    private static final String TABLE_NAME = "job_history";
    /** The path to the layout for the table in our resources. */
    private static final String TABLE_LAYOUT_RESOURCE = "/org/kiji/mapreduce/job-history-layout.json";
    /** JSON file for job history table that adds job counters family. */
    private static final String TABLE_LAYOUT_V2 = "/org/kiji/mapreduce/job-history-layout-v2-counterfamily.json";

    /** Column family where job history information is stored. */
    public static final String JOB_HISTORY_FAMILY = "info";
    /** Column family for job counters. */
    public static final String JOB_HISTORY_COUNTERS_FAMILY = "counters";
    /** Column family where extended information is stored. */
    public static final String JOB_HISTORY_EXTENDED_INFO_FAMILY = "extendedInfo";
    /** Qualifier where job IDs are stored. */
    public static final String JOB_HISTORY_ID_QUALIFIER = "jobId";
    /** Qualifier where job names are stored. */
    public static final String JOB_HISTORY_NAME_QUALIFIER = "jobName";
    /** Qualifier where job start times are stored. */
    public static final String JOB_HISTORY_START_TIME_QUALIFIER = "startTime";
    /** Qualifier where job end times are stored. */
    public static final String JOB_HISTORY_END_TIME_QUALIFIER = "endTime";
    /** Qualifier where job end statuses are stored. */
    public static final String JOB_HISTORY_END_STATUS_QUALIFIER = "jobEndStatus";
    /** Qualifier where job counters are stored. */
    public static final String JOB_HISTORY_COUNTERS_QUALIFIER = "counters";
    /** Qualifier where job configurations are stored. */
    public static final String JOB_HISTORY_CONFIGURATION_QUALIFIER = "configuration";
    /** Value stored to configuration qualifier if the job did not have a configuration. */
    public static final String JOB_HISTORY_NO_CONFIGURATION_VALUE = "No configuration for job.";

    /** Message to record into the {@value #JOB_HISTORY_END_STATUS_QUALIFIER} on job success. */
    public static final String SUCCEEDED = "SUCCEEDED";
    /** Message to record into the {@value #JOB_HISTORY_END_STATUS_QUALIFIER} on job failure. */
    public static final String FAILED = "FAILED";

    /** The HBaseKijiTable managed by the JobHistoryKijiTable. */
    private final KijiTable mKijiTable;

    /**
     * Opens a JobHistoryKijiTable for a given kiji, installing it if necessary. This method should
     * be matched with a call to {@link #close}.
     *
     * @param kiji The kiji instance to use.
     * @return An opened JobHistoryKijiTable.
     * @throws IOException If there is an error opening the table.
     */
    public static JobHistoryKijiTable open(Kiji kiji) throws IOException {
        return new JobHistoryKijiTable(kiji);
    }

    /**
     * Returns the default name of the job history table.
     *
     * @return The name of the job history table as used by the installer.
     */
    public static String getInstallName() {
        return TABLE_NAME;
    }

    /**
     * Extract the counters from a Job.
     *
     * @param job Job from which to get counters.
     * @return a map from counters to their counts. Keys are group:name.
     * @throws IOException in case of an error getting the counters.
     */
    private static Map<String, Long> getCounters(final Job job) throws IOException {
        final Counters counters = job.getCounters();
        final Map<String, Long> countersMap = Maps.newHashMap();
        for (String group : counters.getGroupNames()) {
            for (Counter counter : counters.getGroup(group)) {
                countersMap.put(String.format("%s:%s", group, counter.getName()), counter.getValue());
            }
        }
        return countersMap;
    }

    /**
     * Add counters to an outstanding atomic transaction on the given atomic putter.
     *
     * @param putter atomic putter with an open transaction.
     * @param startTime time in milliseconds since the epoch at which the job started.
     * @param counters map of counters from the job. Keys should be of the form 'group:name'.
     * @throws IOException in case of an error adding the counters to the transaction.
     */
    private static void writeCounters(final AtomicKijiPutter putter, final long startTime,
            final Map<String, Long> counters) throws IOException {
        for (Map.Entry<String, Long> counterEntry : counters.entrySet()) {
            putter.put(JOB_HISTORY_COUNTERS_FAMILY, counterEntry.getKey(), startTime, counterEntry.getValue());
        }
    }

    /**
     * Add extended information to an outstanding atomic transaction on the given atomic putter.
     *
     * @param putter atomic putter with an open transaction.
     * @param startTime time in milliseconds since the epoch at which the job started.
     * @param extendedInfo map of additional information about the job.
     * @throws IOException in case of an error adding the extended info to the transaction.
     */
    private static void writeExtendedInfo(final AtomicKijiPutter putter, final long startTime,
            final Map<String, String> extendedInfo) throws IOException {
        for (Map.Entry<String, String> infoEntry : extendedInfo.entrySet()) {
            putter.put(JOB_HISTORY_EXTENDED_INFO_FAMILY, infoEntry.getKey(), startTime, infoEntry.getValue());
        }
    }

    /**
     * Private constructor that opens a new JobHistoryKijiTable, creating it if necessary.
     * This method also updates an existing layout to the latest layout for the job
     * history table.
     *
     * @param kiji The kiji instance to retrieve the job history table from.
     * @throws IOException If there's an error opening the underlying HBaseKijiTable.
     */
    private JobHistoryKijiTable(Kiji kiji) throws IOException {
        install(kiji);
        mKijiTable = kiji.openTable(TABLE_NAME);
    }

    /**
     * Writes a job into the JobHistoryKijiTable.
     *
     * @param job The job to save.
     * @param startTime The time the job began, in milliseconds.
     * @param endTime The time the job ended, in milliseconds
     * @throws IOException If there is an error writing to the table.
     */
    public void recordJob(final Job job, final long startTime, final long endTime) throws IOException {
        recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(),
                job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap());
    }

    /**
     * Writes details of a job into the JobHistoryKijiTable.
     *
     * @param jobId unique identifier for the job.
     * @param jobName name of the job.
     * @param startTime time in milliseconds since the epoch at which the job started.
     * @param endTime time in milliseconds since the epoch at which the job ended.
     * @param jobSuccess whether the job completed successfully.
     * @param counters map of counters from the job. Keys should be of the form 'group:name'.
     * @param conf Configuration of the job.
     * @param extendedInfo any additional information which should be stored about the job.
     * @throws IOException in case of an error writing to the table.
     */
    // CSOFF: ParameterNumberCheck
    public void recordJob(final String jobId, final String jobName, final long startTime, final long endTime,
            final boolean jobSuccess, final Configuration conf, final Map<String, Long> counters,
            final Map<String, String> extendedInfo) throws IOException {
        // CSON: ParameterNumberCheck
        final EntityId eid = mKijiTable.getEntityId(jobId);
        final AtomicKijiPutter putter = mKijiTable.getWriterFactory().openAtomicPutter();
        try {
            putter.begin(eid);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_ID_QUALIFIER, startTime, jobId);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_NAME_QUALIFIER, startTime, jobName);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_START_TIME_QUALIFIER, startTime, startTime);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_END_TIME_QUALIFIER, startTime, endTime);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_END_STATUS_QUALIFIER, startTime,
                    (jobSuccess) ? SUCCEEDED : FAILED);
            putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_COUNTERS_QUALIFIER, startTime, counters.toString());
            if (null != conf) {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                conf.writeXml(baos);
                putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime,
                        baos.toString("UTF-8"));
            } else {
                putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime,
                        JOB_HISTORY_NO_CONFIGURATION_VALUE);
            }
            writeCounters(putter, startTime, counters);
            writeExtendedInfo(putter, startTime, extendedInfo);
            putter.commit();
        } finally {
            putter.close();
        }
    }

    /**
     * Install the job history table into a Kiji instance. This should be called only
     * via open, because we might want to update the layout of the job history table.
     *
     * @param kiji The Kiji instance to install this table in.
     * @throws IOException If there is an error.
     */
    private static void install(Kiji kiji) throws IOException {
        if (!kiji.getTableNames().contains(TABLE_NAME)) {
            // Try to install the job history table if necessary.
            kiji.createTable(KijiTableLayout.createFromEffectiveJsonResource(TABLE_LAYOUT_RESOURCE).getDesc());
        }
        // At this point, we either have an existing table or we just installed a new
        // one. Check if the table is using the old layout, and update it if it is.
        if (kiji.getMetaTable().getTableLayout(TABLE_NAME).getDesc().getLayoutId()
                .equals(PREV_TABLE_LAYOUT_VERSION)) {
            KijiTableLayout ktl = KijiTableLayout.createFromEffectiveJsonResource(TABLE_LAYOUT_V2);
            kiji.modifyTableLayout(ktl.getDesc());
        }
        // If there are further updates to the job history layout, they should probably be added here.
    }

    /**
     * Get the saved information for a particular JobID.
     *
     * @param jobId The id of the job to retrieve.
     * @return A KijiRowData containing all the information for the requested Job.
     * @throws IOException If there is an IO error retrieving the data.
     */
    public JobHistoryEntry getJobDetails(String jobId) throws IOException {
        KijiDataRequestBuilder builder = KijiDataRequest.builder();
        builder.newColumnsDef().addFamily("info").addFamily("counters").addFamily("extendedInfo");
        final KijiDataRequest request = builder.build();
        final KijiRowData data;
        final KijiTableReader reader = mKijiTable.openTableReader();
        try {
            data = reader.get(mKijiTable.getEntityId(jobId), request);
        } finally {
            reader.close();
        }

        // We have to pull out the maps here to get around a pickiness for the Java compiler because
        // getMostRecentValues returns a generic type, which causes a compile error while passing to
        // setExtendedInfo below.
        NavigableMap<String, String> tempExtMap = data.getMostRecentValues("extendedInfo");
        NavigableMap<String, Long> tempCounterMap = data.getMostRecentValues("counters");

        return JobHistoryEntry.newBuilder().setJobId(data.getMostRecentValue("info", "jobId").toString())
                .setJobName(data.getMostRecentValue("info", "jobName").toString())
                .setJobStartTime(data.<Long>getMostRecentValue("info", "startTime"))
                .setJobEndTime(data.<Long>getMostRecentValue("info", "endTime"))
                .setJobEndStatus(data.getMostRecentValue("info", "jobEndStatus").toString())
                .setJobCounters(data.getMostRecentValue("info", "counters").toString())
                .setJobConfiguration(data.getMostRecentValue("info", "configuration").toString())
                .setExtendedInfo(tempExtMap).setCountersFamily(tempCounterMap).build();
    }

    /**
     * Get the saved information for all JobIDs.
     *
     * @return A KijiRowScanner containing details for all the JobIDs.
     * @throws IOException If there is an IO error retrieving the data.
     */
    public KijiRowScanner getJobScanner() throws IOException {
        KijiDataRequest wdr = KijiDataRequest.create("info");

        KijiTableReader wtr = mKijiTable.openTableReader();
        try {
            return wtr.getScanner(wdr);
        } finally {
            wtr.close();
        }
    }

    @Override
    public void close() throws IOException {
        mKijiTable.release();
    }
}