gobblin.metastore.ZkStateStore.java Source code

Java tutorial


Here is the source code for gobblin.metastore.ZkStateStore.java


 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package gobblin.metastore;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.I0Itec.zkclient.serialize.ZkSerializer;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.io.Text;
import org.apache.helix.AccessOption;
import org.apache.helix.manager.zk.ByteArraySerializer;
import org.apache.helix.store.HelixPropertyStore;
import org.apache.helix.store.zk.ZkHelixPropertyStore;

import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;

import gobblin.configuration.State;
import gobblin.util.io.StreamUtils;

 * An implementation of {@link StateStore} backed by ZooKeeper.
 * <p>
 *     This implementation stores serialized {@link State}s as a blob in ZooKeeper in the Sequence file format.
 *     The ZK path is in the format /STORE_ROOT_DIR/STORE_NAME/TABLE_NAME.
 *     State keys are state IDs (see {@link State#getId()}), and values are objects of {@link State} or
 *     any of its extensions. Keys will be empty strings if state IDs are not set
 *     (i.e., {@link State#getId()} returns <em>null</em>). In this case, the
 *     {@link ZkStateStore#get(String, String, String)} method may not work.
 * </p>
 * @param <T> state object type
public class ZkStateStore<T extends State> implements StateStore<T> {

    // Class of the state objects to be put into the store
    private final Class<T> stateClass;
    private final HelixPropertyStore<byte[]> propStore;
    private final boolean compressedValues;

     * State store that stores instances of {@link State}s in a ZooKeeper-backed {@link HelixPropertyStore}
     * storeRootDir will be created when the first entry is written if it does not exist
     * @param connectString ZooKeeper connect string
     * @param storeRootDir The root directory for the state store
     * @param compressedValues should values be compressed for storage?
     * @param stateClass The type of state being stored
     * @throws IOException
    public ZkStateStore(String connectString, String storeRootDir, boolean compressedValues, Class<T> stateClass)
            throws IOException {
        this.compressedValues = compressedValues;
        this.stateClass = stateClass;

        ZkSerializer serializer = new ByteArraySerializer();
        propStore = new ZkHelixPropertyStore<byte[]>(connectString, serializer, storeRootDir);

    private String formPath(String storeName) {
        return "/" + storeName;

    private String formPath(String storeName, String tableName) {
        return "/" + storeName + "/" + tableName;

    public boolean create(String storeName) throws IOException {
        String path = formPath(storeName);

        return propStore.exists(path, 0)
                || propStore.create(path, ArrayUtils.EMPTY_BYTE_ARRAY, AccessOption.PERSISTENT);

    public boolean create(String storeName, String tableName) throws IOException {
        String path = formPath(storeName, tableName);

        if (propStore.exists(path, 0)) {
            throw new IOException(
                    String.format("State already exists for storeName %s tableName %s", storeName, tableName));

        return propStore.create(path, ArrayUtils.EMPTY_BYTE_ARRAY, AccessOption.PERSISTENT);

    public boolean exists(String storeName, String tableName) throws IOException {
        String path = formPath(storeName, tableName);

        return propStore.exists(path, 0);

     * Serializes the state to the {@link DataOutput}
     * @param dataOutput output target receiving the serialized data
     * @param state the state to serialize
     * @throws IOException
    private void addStateToDataOutputStream(DataOutput dataOutput, T state) throws IOException {
        new Text(Strings.nullToEmpty(state.getId())).write(dataOutput);

     * Create a new znode with data if it does not exist otherwise update with data
     * @param storeName storeName portion of znode path
     * @param tableName tableName portion of znode path
     * @param data znode data
     * @throws IOException
    private void putData(String storeName, String tableName, byte[] data) throws IOException {
        String path = formPath(storeName, tableName);

        if (!propStore.exists(path, 0)) {
            // create with data
            if (!propStore.create(path, data, AccessOption.PERSISTENT)) {
                throw new IOException("Failed to create a state file for table " + tableName);
        } else {
            // Update
            propStore.set(path, data, AccessOption.PERSISTENT);

    public void put(String storeName, String tableName, T state) throws IOException {
        putAll(storeName, tableName, Collections.singletonList(state));

    public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
        try (ByteArrayOutputStream byteArrayOs = new ByteArrayOutputStream();
                OutputStream os = compressedValues ? new GZIPOutputStream(byteArrayOs) : byteArrayOs;
                DataOutputStream dataOutput = new DataOutputStream(os)) {

            for (T state : states) {
                addStateToDataOutputStream(dataOutput, state);

            putData(storeName, tableName, byteArrayOs.toByteArray());

    public T get(String storeName, String tableName, String stateId) throws IOException {
        String path = formPath(storeName, tableName);
        byte[] data = propStore.get(path, null, 0);
        List<T> states = Lists.newArrayList();

        deserialize(data, states, stateId);

        if (states.isEmpty()) {
            return null;
        } else {
            return states.get(0);

     * Retrieve states from the state store based on the store name and a filtering predicate
     * @param storeName The store name enclosing the state files
     * @param predicate The predicate for state file filtering
     * @return list of states matching matching the predicate
     * @throws IOException
    protected List<T> getAll(String storeName, Predicate<String> predicate) throws IOException {
        List<T> states = Lists.newArrayList();
        String path = formPath(storeName);
        byte[] data;

        List<String> children = propStore.getChildNames(path, 0);

        if (children == null) {
            return Collections.emptyList();

        for (String c : children) {
            if (predicate.apply(c)) {
                data = propStore.get(path + "/" + c, null, 0);
                deserialize(data, states);

        return states;

    public List<T> getAll(String storeName, String tableName) throws IOException {
        List<T> states = Lists.newArrayList();
        String path = formPath(storeName, tableName);
        byte[] data = propStore.get(path, null, 0);

        deserialize(data, states);

        return states;

    public List<T> getAll(String storeName) throws IOException {
        return getAll(storeName, Predicates.<String>alwaysTrue());

    public List<String> getTableNames(String storeName, Predicate<String> predicate) throws IOException {
        List<String> names = Lists.newArrayList();
        String path = formPath(storeName);

        List<String> children = propStore.getChildNames(path, 0);

        if (children != null) {
            for (String c : children) {
                if (predicate.apply(c)) {

        return names;

    public void createAlias(String storeName, String original, String alias) throws IOException {
        String pathOriginal = formPath(storeName, original);
        byte[] data;

        if (!propStore.exists(pathOriginal, 0)) {
            throw new IOException(String.format("State does not exist for table %s", original));

        data = propStore.get(pathOriginal, null, 0);

        putData(storeName, alias, data);

    public void delete(String storeName, String tableName) throws IOException {
        propStore.remove(formPath(storeName, tableName), 0);

    public void delete(String storeName) throws IOException {
        propStore.remove(formPath(storeName), 0);

     * Deserialize data into a list of {@link State}s.
     * @param data byte array
     * @param states output list of states
     * @param stateId optional key filter. Set to null for no filtering.
     * @throws IOException
    private void deserialize(byte[] data, List<T> states, String stateId) throws IOException {
        if (data != null) {
            Text key = new Text();

            try (ByteArrayInputStream bais = new ByteArrayInputStream(data);
                    InputStream is = StreamUtils.isCompressed(data) ? new GZIPInputStream(bais) : bais;
                    DataInputStream dis = new DataInputStream(is)) {
                // keep deserializing while we have data
                while (dis.available() > 0) {
                    T state = this.stateClass.newInstance();


                    if (stateId != null && key.toString().equals(stateId)) {
            } catch (EOFException e) {
                // no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
            } catch (RuntimeException e) {
                throw e;
            } catch (Exception e) {
                throw new IOException("failure deserializing state from ZkStateStore", e);

     * Deserialize data into a list of {@link State}s.
     * @param data byte array
     * @param states output list of states
     * @throws IOException
    private void deserialize(byte[] data, List<T> states) throws IOException {
        deserialize(data, states, null);