gobblin.configuration.SourceState.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.configuration.SourceState.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.configuration;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import gobblin.source.workunit.WorkUnit;
import gobblin.source.workunit.Extract;

import lombok.Getter;

/**
 * A container for all meta data related to a particular source. This includes all properties
 * defined in job configuration files and all properties from tasks of the previous run.
 *
 * <p>
 *   Properties can be overwritten at runtime and persisted upon job completion. Persisted
 *   properties will be loaded in the next run and made available to use by the
 *   {@link gobblin.source.Source}.
 * </p>
 *
 * @author kgoodhop
 */
public class SourceState extends State {

    private static final Set<Extract> EXTRACT_SET = Sets.newConcurrentHashSet();
    private static final DateTimeFormatter DTF = DateTimeFormat.forPattern("yyyyMMddHHmmss").withLocale(Locale.US)
            .withZone(DateTimeZone.UTC);

    @Getter
    private final Map<String, SourceState> previousDatasetStatesByUrns;

    @Getter
    private final List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();

    /**
     * Default constructor.
     */
    public SourceState() {
        this.previousDatasetStatesByUrns = ImmutableMap.of();
    }

    /**
     * Constructor.
     *
     * @param properties job configuration properties
     * @param previousWorkUnitStates an {@link Iterable} of {@link WorkUnitState}s of the previous job run
     */
    public SourceState(State properties, Iterable<WorkUnitState> previousWorkUnitStates) {
        super.addAll(properties);
        this.previousDatasetStatesByUrns = ImmutableMap.of();
        for (WorkUnitState workUnitState : previousWorkUnitStates) {
            this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState));
        }
    }

    /**
     * Constructor.
     *
     * @param properties job configuration properties
     * @param previousDatasetStatesByUrns {@link SourceState} of the previous job run
     * @param previousWorkUnitStates an {@link Iterable} of {@link WorkUnitState}s of the previous job run
     */
    public SourceState(State properties, Map<String, ? extends SourceState> previousDatasetStatesByUrns,
            Iterable<WorkUnitState> previousWorkUnitStates) {
        super.addAll(properties.getProperties());
        this.previousDatasetStatesByUrns = ImmutableMap.copyOf(previousDatasetStatesByUrns);
        for (WorkUnitState workUnitState : previousWorkUnitStates) {
            this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState));
        }
    }

    /**
     * Get the {@link SourceState} of the previous job run.
     *
     * <p>
     *   This is a convenient method for existing jobs that do not use the new feature that allows output data to
     *   be committed on a per-dataset basis. Use of this method assumes that the job deals with a single dataset,
     *   which uses the default data URN defined by {@link ConfigurationKeys#DEFAULT_DATASET_URN}.
     * </p>
     *
     * @return {@link SourceState} of the previous job run or {@code null} if no previous {@link SourceState} is found
     */
    public SourceState getPreviousSourceState() {
        return getPreviousDatasetState(ConfigurationKeys.DEFAULT_DATASET_URN);
    }

    /**
     * Get the state (in the form of a {@link SourceState}) of a dataset identified by a dataset URN
     * of the previous job run.
     *
     * @param datasetUrn the dataset URN
     * @return the dataset state (in the form of a {@link SourceState}) of the previous job run
     *         or {@code null} if no previous dataset state is found for the given dataset URN
     */
    public SourceState getPreviousDatasetState(String datasetUrn) {
        if (!this.previousDatasetStatesByUrns.containsKey(datasetUrn)) {
            return null;
        }
        return new ImmutableSourceState(this.previousDatasetStatesByUrns.get(datasetUrn));
    }

    /**
     * Get a {@link Map} from dataset URNs (as being specified by {@link ConfigurationKeys#DATASET_URN_KEY}
     * to the {@link WorkUnitState} with the dataset URNs.
     *
     * <p>
     *   {@link WorkUnitState}s that do not have {@link ConfigurationKeys#DATASET_URN_KEY} set will be added
     *   to the dataset state belonging to {@link ConfigurationKeys#DEFAULT_DATASET_URN}.
     * </p>
     *
     * @return a {@link Map} from dataset URNs to the {@link WorkUnitState} with the dataset URNs
     */
    public Map<String, Iterable<WorkUnitState>> getPreviousWorkUnitStatesByDatasetUrns() {
        Map<String, Iterable<WorkUnitState>> previousWorkUnitStatesByDatasetUrns = Maps.newHashMap();

        for (WorkUnitState workUnitState : this.previousWorkUnitStates) {
            String datasetUrn = workUnitState.getProp(ConfigurationKeys.DATASET_URN_KEY,
                    ConfigurationKeys.DEFAULT_DATASET_URN);
            if (!previousWorkUnitStatesByDatasetUrns.containsKey(datasetUrn)) {
                previousWorkUnitStatesByDatasetUrns.put(datasetUrn, Lists.<WorkUnitState>newArrayList());
            }
            ((List<WorkUnitState>) previousWorkUnitStatesByDatasetUrns.get(datasetUrn)).add(workUnitState);
        }

        return ImmutableMap.copyOf(previousWorkUnitStatesByDatasetUrns);
    }

    /**
     * Create a new properly populated {@link Extract} instance.
     *
     * <p>
     *   This method should always return a new unique {@link Extract} instance.
     * </p>
     *
     * @param type {@link gobblin.source.workunit.Extract.TableType}
     * @param namespace namespace of the table this extract belongs to
     * @param table name of the table this extract belongs to
     * @return a new unique {@link Extract} instance
     *
     * @Deprecated Use {@link gobblin.source.extractor.extract.AbstractSource#createExtract(
     * gobblin.source.workunit.Extract.TableType, String, String)}
     */
    @Deprecated
    public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) {
        Extract extract = new Extract(this, type, namespace, table);
        while (EXTRACT_SET.contains(extract)) {
            if (Strings.isNullOrEmpty(extract.getExtractId())) {
                extract.setExtractId(DTF.print(new DateTime()));
            } else {
                DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId());
                extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1)));
            }
        }
        EXTRACT_SET.add(extract);
        return extract;
    }

    /**
     * Create a new {@link WorkUnit} instance from a given {@link Extract}.
     *
     * @param extract given {@link Extract}
     * @return a new {@link WorkUnit} instance
     *
     * @deprecated Properties in SourceState should not added to a WorkUnit. Having each WorkUnit contain a copy of
     * SourceState is a waste of memory. Use {@link WorkUnit#create(Extract)}.
     */
    @Deprecated
    public WorkUnit createWorkUnit(Extract extract) {
        return new WorkUnit(this, extract);
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(this.previousWorkUnitStates.size());
        for (WorkUnitState state : this.previousWorkUnitStates) {
            state.write(out);
        }
        super.write(out);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        int size = in.readInt();
        for (int i = 0; i < size; i++) {
            WorkUnitState workUnitState = new WorkUnitState();
            workUnitState.readFields(in);
            this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState));
        }
        super.readFields(in);
    }

    @Override
    public boolean equals(Object object) {
        if (!(object instanceof SourceState)) {
            return false;
        }

        SourceState other = (SourceState) object;
        return super.equals(other) && this.previousDatasetStatesByUrns.equals(other.previousDatasetStatesByUrns)
                && this.previousWorkUnitStates.equals(other.previousWorkUnitStates);
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = super.hashCode();
        result = prime * result + this.previousDatasetStatesByUrns.hashCode();
        result = prime * result + this.previousWorkUnitStates.hashCode();
        return result;
    }

    /**
     * An immutable version of {@link SourceState} that disables all methods that may change the
     * internal state of a {@link SourceState}.
     */
    private static class ImmutableSourceState extends SourceState {

        public ImmutableSourceState(SourceState sourceState) {
            super(sourceState, sourceState.previousDatasetStatesByUrns, sourceState.previousWorkUnitStates);
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void setId(String id) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void setProp(String key, Object value) {
            throw new UnsupportedOperationException();
        }

        @Override
        public synchronized void appendToListProp(String key, String value) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void addAll(State otherState) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void addAll(Properties properties) {
            throw new UnsupportedOperationException();
        }
    }
}