gobblin.compaction.event.CompactionSlaEventHelper.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.compaction.event.CompactionSlaEventHelper.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.compaction.event;

import java.io.IOException;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Optional;

import gobblin.compaction.dataset.Dataset;
import gobblin.compaction.mapreduce.MRCompactor;
import gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer;
import gobblin.compaction.mapreduce.avro.AvroKeyMapper;
import gobblin.configuration.State;
import gobblin.metrics.event.sla.SlaEventKeys;
import gobblin.metrics.event.sla.SlaEventSubmitter;
import gobblin.metrics.event.sla.SlaEventSubmitter.SlaEventSubmitterBuilder;

/**
 * Helper class to build compaction sla event metadata.
 */
public class CompactionSlaEventHelper {

    private static final Logger LOG = LoggerFactory.getLogger(CompactionSlaEventHelper.class);
    public static final String RECOMPATED_METADATA_NAME = "recompacted";
    public static final String DATASET_OUTPUT_PATH = "datasetOutputPath";
    public static final String LATE_RECORD_COUNT = "lateRecordCount";
    public static final String REGULAR_RECORD_COUNT = "regularRecordCount";
    public static final String NEED_RECOMPACT = "needRecompact";

    public static final String COMPACTION_COMPLETED_EVENT_NAME = "CompactionCompleted";
    public static final String COMPACTION_FAILED_EVENT_NAME = "CompactionFailed";
    public static final String COMPLETION_VERIFICATION_FAILED_EVENT_NAME = "CompletenessCannotBeVerified";
    public static final String COMPLETION_VERIFICATION_SUCCESS_EVENT_NAME = "CompletenessVerified";
    public static final String COMPACTION_RECORD_COUNT_EVENT = "CompactionRecordCounts";

    /**
     * Get an {@link SlaEventSubmitterBuilder} that has dataset urn, partition, record count, previous publish timestamp
     * and dedupe status set.
     * The caller MUST set eventSubmitter, eventname before submitting.
     */
    public static SlaEventSubmitterBuilder getEventSubmitterBuilder(Dataset dataset, Optional<Job> job,
            FileSystem fs) {
        SlaEventSubmitterBuilder builder = SlaEventSubmitter.builder().datasetUrn(dataset.getUrn())
                .partition(dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, ""))
                .dedupeStatus(getOutputDedupeStatus(dataset.jobProps()));

        long previousPublishTime = getPreviousPublishTime(dataset, fs);
        long upstreamTime = dataset.jobProps().getPropAsLong(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, -1l);
        long recordCount = getRecordCount(job);

        // Previous publish only exists when this is a recompact job
        if (previousPublishTime != -1l) {
            builder.previousPublishTimestamp(Long.toString(previousPublishTime));
        }
        // Upstream time is the logical time represented by the compaction input directory
        if (upstreamTime != -1l) {
            builder.upstreamTimestamp(Long.toString(upstreamTime));
        }
        if (recordCount != -1l) {
            builder.recordCount(Long.toString(recordCount));
        }
        return builder;
    }

    /**
     * {@link Deprecated} use {@link #getEventSubmitterBuilder(Dataset, Optional, FileSystem)}
     */
    @Deprecated
    public static void populateState(Dataset dataset, Optional<Job> job, FileSystem fs) {
        dataset.jobProps().setProp(SlaEventKeys.DATASET_URN_KEY, dataset.getUrn());
        dataset.jobProps().setProp(SlaEventKeys.PARTITION_KEY,
                dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, ""));
        dataset.jobProps().setProp(SlaEventKeys.DEDUPE_STATUS_KEY, getOutputDedupeStatus(dataset.jobProps()));
        dataset.jobProps().setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY,
                getPreviousPublishTime(dataset, fs));
        dataset.jobProps().setProp(SlaEventKeys.RECORD_COUNT_KEY, getRecordCount(job));
    }

    public static void setUpstreamTimeStamp(State state, long time) {
        state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, Long.toString(time));
    }

    private static long getPreviousPublishTime(Dataset dataset, FileSystem fs) {
        Path compactionCompletePath = new Path(dataset.outputPath(), MRCompactor.COMPACTION_COMPLETE_FILE_NAME);
        try {
            return fs.getFileStatus(compactionCompletePath).getModificationTime();
        } catch (IOException e) {
            LOG.debug("Failed to get previous publish time.", e);
        }
        return -1l;
    }

    private static String getOutputDedupeStatus(State state) {
        return state.getPropAsBoolean(MRCompactor.COMPACTION_OUTPUT_DEDUPLICATED,
                MRCompactor.DEFAULT_COMPACTION_OUTPUT_DEDUPLICATED) ? DedupeStatus.DEDUPED.toString()
                        : DedupeStatus.NOT_DEDUPED.toString();
    }

    private static long getRecordCount(Optional<Job> job) {

        if (!job.isPresent()) {
            return -1l;
        }

        Counters counters = null;
        try {
            counters = job.get().getCounters();
        } catch (IOException e) {
            LOG.debug("Failed to get job counters. Record count will not be set. ", e);
            return -1l;
        }

        Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);

        if (recordCounter != null && recordCounter.getValue() != 0) {
            return recordCounter.getValue();
        }

        recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);

        if (recordCounter != null && recordCounter.getValue() != 0) {
            return recordCounter.getValue();
        }

        LOG.debug("Non zero record count not found in both mapper and reducer counters");

        return -1l;
    }
}