com.alibaba.jstorm.hdfs.transaction.RocksDbHdfsState.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.jstorm.hdfs.transaction.RocksDbHdfsState.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.alibaba.jstorm.hdfs.transaction;

import com.alibaba.jstorm.metric.JStormMetrics;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileStatus;
import org.rocksdb.Checkpoint;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.DBOptions;
import org.rocksdb.FlushOptions;
import org.rocksdb.Options;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.RocksIterator;
import org.rocksdb.TtlDB;
import org.rocksdb.WriteBatch;
import org.rocksdb.WriteOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.state.Serializer;
import backtype.storm.task.TopologyContext;
import backtype.storm.utils.Utils;

import com.alibaba.jstorm.cache.rocksdb.RocksDbOptionsFactory;
import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.Common;
import com.alibaba.jstorm.common.metric.AsmHistogram;
import com.alibaba.jstorm.hdfs.HdfsCache;
import com.alibaba.jstorm.metric.MetricClient;
import com.alibaba.jstorm.transactional.state.IRichCheckpointKvState;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.SerializerFactory;

/**
 * 1. Backup checkpoint state from local FS to remote FS 
 * 2. Restore checkpoint state from remoteFS to local FS
 * 
 * HDFS state structure: base_dir/topology_name/rocksdb/key_range/0001.sst 
 *                                                                0002.sst 
 *                                                                ....... 
 *                                                                checkpoint/batchId/sstFiles.list 
 *                                                                                   CURRENT 
 *                                                                                   MANIFEST-*
 * 
 * Local RocksDB structure: worker_dir/transactionState/rocksdb/key_range/rocksdb_files 
 *                                                                        checkpoint/...
 * 
 * Transaction State
 */
public class RocksDbHdfsState<K, V> implements IRichCheckpointKvState<K, V, String> {
    private static final long serialVersionUID = 7907988526099798193L;

    private static final Logger LOG = LoggerFactory.getLogger(RocksDbHdfsState.class);

    protected static final String ROCKSDB_DATA_FILE_EXT = "sst";
    protected static final String SST_FILE_LIST = "sstFile.list";
    protected static final String ENABLE_METRICS = "rocksdb.hdfs.state.metrics";

    protected String topologyName;
    protected Map conf;

    protected String stateName;

    protected HdfsCache hdfsCache;
    protected String hdfsDbDir;
    protected String hdfsCheckpointDir;

    protected RocksDB rocksDb;
    protected String rocksDbDir;
    protected String rocksDbCheckpointDir;
    protected Collection<String> lastCheckpointFiles;

    protected Serializer<Object> serializer;

    protected int ttlTimeSec;

    protected long lastCleanTime;
    protected long cleanPeriod;

    protected long lastSuccessBatchId;

    protected MetricClient metricClient;
    protected AsmHistogram hdfsWriteLatency;
    protected AsmHistogram hdfsDeleteLatency;
    protected AsmHistogram rocksDbFlushAndCpLatency;

    public RocksDbHdfsState() {

    }

    @Override
    public void init(TopologyContext context) {
        try {
            this.topologyName = Common.topologyIdToName(context.getTopologyId());
        } catch (InvalidTopologyException e) {
            LOG.error("Failed get topology name by id-{}", context.getTopologyId());
            throw new RuntimeException(e.getMessage());
        }
        String workerDir = context.getWorkerIdDir();
        metricClient = new MetricClient(context);
        hdfsWriteLatency = metricClient.registerHistogram("HDFS write latency");
        hdfsDeleteLatency = metricClient.registerHistogram("HDFS delete latency");
        rocksDbFlushAndCpLatency = metricClient.registerHistogram("RocksDB flush and checkpoint latency");
        cleanPeriod = ConfigExtension.getTransactionBatchSnapshotTimeout(context.getStormConf()) * 5 * 1000;
        serializer = SerializerFactory.createSerailzer(context.getStormConf());
        initEnv(topologyName, context.getStormConf(), workerDir);
    }

    public void initEnv(String topologyName, Map conf, String workerDir) {
        this.conf = conf;
        // init hdfs env
        this.hdfsCache = new HdfsCache(conf);
        this.hdfsDbDir = hdfsCache.getBaseDir() + "/" + topologyName + "/rocksDb/" + stateName;
        this.hdfsCheckpointDir = hdfsDbDir + "/checkpoint";
        try {
            if (!hdfsCache.exist(hdfsDbDir))
                hdfsCache.mkdir(hdfsDbDir);
            if (!hdfsCache.exist(hdfsCheckpointDir))
                hdfsCache.mkdir(hdfsCheckpointDir);
        } catch (IOException e) {
            LOG.error("Failed to create hdfs dir for path=" + hdfsCheckpointDir, e);
            throw new RuntimeException(e.getMessage());
        }

        // init local rocksdb even
        this.rocksDbDir = workerDir + "/transactionState/rocksdb/" + stateName;
        this.rocksDbCheckpointDir = rocksDbDir + "/checkpoint";
        initLocalRocksDbDir();
        initRocksDb();

        LOG.info("HDFS: dataDir={}, checkpointDir={}", hdfsDbDir, hdfsCheckpointDir);
        LOG.info("Local: dataDir={}, checkpointDir={}", rocksDbDir, rocksDbCheckpointDir);
    }

    protected void initRocksDb() {
        RocksDbOptionsFactory optionFactory = new RocksDbOptionsFactory.Defaults();
        Options options = optionFactory.createOptions(null);
        DBOptions dbOptions = optionFactory.createDbOptions(null);
        ColumnFamilyOptions cfOptions = optionFactory.createColumnFamilyOptions(null);
        String optionsFactoryClass = (String) conf.get(ConfigExtension.ROCKSDB_OPTIONS_FACTORY_CLASS);
        if (optionsFactoryClass != null) {
            RocksDbOptionsFactory udfOptionFactory = (RocksDbOptionsFactory) Utils.newInstance(optionsFactoryClass);
            options = udfOptionFactory.createOptions(options);
            dbOptions = udfOptionFactory.createDbOptions(dbOptions);
            cfOptions = udfOptionFactory.createColumnFamilyOptions(cfOptions);
        }

        try {
            ttlTimeSec = ConfigExtension.getStateTtlTime(conf);
            if (ttlTimeSec > 0)
                rocksDb = TtlDB.open(options, rocksDbDir, ttlTimeSec, false);
            else
                rocksDb = RocksDB.open(options, rocksDbDir);
            // enable compaction
            rocksDb.compactRange();
            LOG.info("Finish the initialization of RocksDB");
        } catch (RocksDBException e) {
            LOG.error("Failed to open rocksdb located at " + rocksDbDir, e);
            throw new RuntimeException(e.getMessage());
        }

        lastCheckpointFiles = new HashSet<String>();
        lastCleanTime = System.currentTimeMillis();
        lastSuccessBatchId = -1;
    }

    private void initLocalRocksDbDir() {
        try {
            File file = new File(rocksDbDir);
            if (file.exists())
                FileUtils.cleanDirectory(file);
            FileUtils.forceMkdir(new File(rocksDbCheckpointDir));
        } catch (IOException e) {
            LOG.error("Failed to create dir for path=" + rocksDbCheckpointDir, e);
            throw new RuntimeException(e.getMessage());
        }
    }

    @Override
    public void setStateName(String stateName) {
        this.stateName = stateName;
    }

    @Override
    public void put(K key, V value) {
        try {
            rocksDb.put(serializer.serialize(key), serializer.serialize(value));
        } catch (RocksDBException e) {
            LOG.error("Failed to put data, key={}, value={}", key, value);
            throw new RuntimeException(e.getMessage());
        }
    }

    @Override
    public void putBatch(Map<K, V> batch) {
        try {
            WriteBatch writeBatch = new WriteBatch();
            for (Map.Entry<K, V> entry : batch.entrySet()) {
                writeBatch.put(serializer.serialize(entry.getKey()), serializer.serialize(entry.getValue()));
            }
            rocksDb.write(new WriteOptions(), writeBatch);
        } catch (RocksDBException e) {
            LOG.error("Failed to put batch={}", batch);
            throw new RuntimeException(e.getMessage());
        }
    }

    @Override
    public V get(K key) {
        try {
            V ret = null;
            if (key != null) {
                byte[] rawKey = serializer.serialize(key);
                byte[] rawData = rocksDb.get(rawKey);
                ret = rawData != null ? (V) serializer.deserialize(rawData) : null;
            }
            return ret;
        } catch (RocksDBException e) {
            LOG.error("Failed to get value by key-{}", key);
            throw new RuntimeException(e.getMessage());
        }
    }

    @Override
    public Map<K, V> getBatch(Collection<K> keys) {
        Map<K, V> batch = new HashMap<K, V>();
        for (K key : keys) {
            V value = get(key);
            if (value != null)
                batch.put(key, value);
        }
        return batch;
    }

    @Override
    public Map<K, V> getBatch() {
        Map<K, V> batch = new HashMap<K, V>();
        RocksIterator itr = rocksDb.newIterator();
        itr.seekToFirst();
        while (itr.isValid()) {
            byte[] rawKey = itr.key();
            byte[] rawValue = itr.value();
            V value = rawValue != null ? (V) serializer.deserialize(rawValue) : null;
            batch.put((K) serializer.deserialize(rawKey), value);
            itr.next();
        }
        return batch;
    }

    @Override
    public Collection<K> getAllKeys() {
        Collection<K> keys = new ArrayList<K>();
        RocksIterator itr = rocksDb.newIterator();
        itr.seekToFirst();
        while (itr.isValid()) {
            keys.add((K) serializer.deserialize(itr.key()));
            itr.next();
        }
        return keys;
    }

    @Override
    public void remove(K key) {
        try {
            rocksDb.remove(serializer.serialize(key));
        } catch (RocksDBException e) {
            LOG.warn("Failed to remove " + key, e);
        }
    }

    @Override
    public void clear() {
        for (K key : getAllKeys()) {
            remove(key);
        }
    }

    @Override
    public void cleanup() {
        if (rocksDb != null)
            rocksDb.dispose();
        if (hdfsCache != null)
            hdfsCache.close();
    }

    @Override
    public void restore(String checkpointBackupDir) {
        LOG.info("Start restore from remote: {}", checkpointBackupDir);

        if (rocksDb != null)
            rocksDb.dispose();
        initLocalRocksDbDir();
        // Restore db files from hdfs to local disk
        try {
            if (checkpointBackupDir != null) {
                // Get dir of sst files
                int index = checkpointBackupDir.lastIndexOf("checkpoint");
                String remoteDbBackupDir = checkpointBackupDir.substring(0, index);

                // copy sstFile.list, CURRENT, MANIFEST to local disk for the specified batch
                Collection<String> files = hdfsCache.listFile(checkpointBackupDir, false);
                LOG.debug("Restore checkpoint files: {}", files);
                for (String fileName : files)
                    hdfsCache.copyToLocal(checkpointBackupDir + "/" + fileName, rocksDbDir);

                // copy all rocksDB sst files to local disk
                String sstFileList = rocksDbDir + "/" + SST_FILE_LIST;
                File file = new File(sstFileList);
                List<String> sstFiles = FileUtils.readLines(file);
                LOG.debug("Restore sst files: {}", sstFiles);
                for (String sstFile : sstFiles) {
                    hdfsCache.copyToLocal(remoteDbBackupDir + "/" + sstFile, rocksDbDir);
                }
                FileUtils.deleteQuietly(file);
            }
            initRocksDb();
        } catch (IOException e) {
            LOG.error("Failed to restore checkpoint", e);
            throw new RuntimeException(e.getMessage());
        }
    }

    @Override
    public String backup(long batchId) {
        try {
            String hdfsCpDir = getRemoteCheckpointPath(batchId);
            String batchCpPath = getLocalCheckpointPath(batchId);
            long startTime = System.currentTimeMillis();

            // upload sst data files to hdfs
            Collection<File> sstFiles = FileUtils.listFiles(new File(batchCpPath),
                    new String[] { ROCKSDB_DATA_FILE_EXT }, false);
            for (File sstFile : sstFiles) {
                if (!lastCheckpointFiles.contains(sstFile.getName())) {
                    hdfsCache.copyToDfs(batchCpPath + "/" + sstFile.getName(), hdfsDbDir, true);
                }
            }

            // upload sstFile.list, CURRENT, MANIFEST to hdfs
            Collection<String> sstFileList = getFileList(sstFiles);
            File cpFileList = new File(batchCpPath + "/" + SST_FILE_LIST);
            FileUtils.writeLines(cpFileList, sstFileList);

            if (hdfsCache.exist(hdfsCpDir))
                hdfsCache.remove(hdfsCpDir, true);

            hdfsCache.mkdir(hdfsCpDir);
            Collection<File> allFiles = FileUtils.listFiles(new File(batchCpPath), null, false);
            allFiles.removeAll(sstFiles);
            Collection<File> nonSstFiles = allFiles;
            for (File nonSstFile : nonSstFiles) {
                hdfsCache.copyToDfs(batchCpPath + "/" + nonSstFile.getName(), hdfsCpDir, true);
            }

            if (JStormMetrics.enabled)
                hdfsWriteLatency.update(System.currentTimeMillis() - startTime);
            lastCheckpointFiles = sstFileList;
            return hdfsCpDir;
        } catch (IOException e) {
            LOG.error("Failed to upload checkpoint", e);
            throw new RuntimeException(e.getMessage());
        }
    }

    /**
     * Flush the data in memtable of RocksDB into disk, and then create checkpoint
     * 
     * @param batchId
     */
    @Override
    public void checkpoint(long batchId) {
        long startTime = System.currentTimeMillis();
        try {
            rocksDb.flush(new FlushOptions());
            Checkpoint cp = Checkpoint.create(rocksDb);
            cp.createCheckpoint(getLocalCheckpointPath(batchId));
        } catch (RocksDBException e) {
            LOG.error("Failed to create checkpoint for batch-" + batchId, e);
            throw new RuntimeException(e.getMessage());
        }

        if (JStormMetrics.enabled)
            rocksDbFlushAndCpLatency.update(System.currentTimeMillis() - startTime);
    }

    /**
     * remove obsolete checkpoint data at local disk and remote backup storage
     * 
     * @param batchId id of success batch
     */
    @Override
    public void remove(long batchId) {
        removeObsoleteLocalCheckpoints(batchId);
        removeObsoleteRemoteCheckpoints(batchId);
    }

    private String getLocalCheckpointPath(long batchId) {
        return rocksDbCheckpointDir + "/" + batchId;
    }

    private String getRemoteCheckpointPath(long batchId) {
        return hdfsCheckpointDir + "/" + batchId;
    }

    private String getRemoteCheckpointSstListFile(long batchId) {
        return getRemoteCheckpointPath(batchId) + "/" + SST_FILE_LIST;
    }

    private Collection<String> getFileList(Collection<File> files) {
        Collection<String> ret = new HashSet<String>();
        for (File file : files)
            ret.add(file.getName());
        return ret;
    }

    private void removeObsoleteLocalCheckpoints(long successBatchId) {
        File cpRootDir = new File(rocksDbCheckpointDir);
        for (String cpDir : cpRootDir.list()) {
            try {
                long cpId = JStormUtils.parseLong(cpDir);
                if (cpId < successBatchId)
                    FileUtils.deleteQuietly(new File(rocksDbCheckpointDir + "/" + cpDir));
            } catch (Throwable e) {
                File file = new File(rocksDbCheckpointDir + "/" + cpDir);
                // If existing more thant one hour, remove the unexpected file
                if (System.currentTimeMillis() - file.lastModified() > 60 * 60 * 1000) {
                    LOG.debug("Unexpected file-" + cpDir + " in local checkpoint dir, " + rocksDbCheckpointDir, e);
                    FileUtils.deleteQuietly(file);
                }
            }
        }
    }

    private void removeObsoleteRemoteCheckpoints(long successBatchId) {
        long startTime = System.currentTimeMillis();
        if (lastSuccessBatchId != -1 && lastSuccessBatchId != (successBatchId - 1)) {
            LOG.warn("Some ack msgs from TM were lost!. Last success batch Id: {}, Current success batch Id: {}",
                    lastSuccessBatchId, successBatchId);
        }
        lastSuccessBatchId = successBatchId;
        long obsoleteBatchId = successBatchId - 1;
        try {
            Collection<String> lastSuccessSStFiles = hdfsCache
                    .readLines(getRemoteCheckpointSstListFile(successBatchId));
            if (hdfsCache.exist(getRemoteCheckpointPath(obsoleteBatchId))) {
                // remove obsolete sst files
                Collection<String> obsoleteSstFiles = hdfsCache
                        .readLines(getRemoteCheckpointSstListFile(obsoleteBatchId));
                obsoleteSstFiles.removeAll(lastSuccessSStFiles);
                for (String sstFile : obsoleteSstFiles) {
                    hdfsCache.remove(hdfsDbDir + "/" + sstFile, false);
                }

                // remove checkpoint dir
                hdfsCache.remove(getRemoteCheckpointPath(obsoleteBatchId), true);
            }

            // Sometimes if remove was failed, some checkpoint files would be left in remote FS.
            // So here to check if full clean is required for a specified period. If so, clean all checkpoint files which are expired.
            long currentTime = System.currentTimeMillis();
            if (currentTime - lastCleanTime > cleanPeriod) {
                FileStatus successCpFileStatus = hdfsCache
                        .getFileStatus(getRemoteCheckpointSstListFile(successBatchId));
                // remove obsolete sst files
                FileStatus[] fileStatuses = hdfsCache.listFileStatus(hdfsDbDir);
                for (FileStatus fileStatus : fileStatuses) {
                    String fileName = fileStatus.getPath().getName();
                    if (fileStatus.getModificationTime() < successCpFileStatus.getModificationTime()
                            && !lastSuccessSStFiles.contains(fileName)) {
                        hdfsCache.remove(hdfsDbDir + "/" + fileName, true);
                    }
                }

                // remove obsolete checkpoint dir
                fileStatuses = hdfsCache.listFileStatus(hdfsCheckpointDir);
                for (FileStatus fileStatus : fileStatuses) {
                    String checkpointId = fileStatus.getPath().getName();
                    if (fileStatus.getModificationTime() < successCpFileStatus.getModificationTime()
                            && Integer.valueOf(checkpointId) != successBatchId) {
                        hdfsCache.remove(hdfsCheckpointDir + "/" + checkpointId, true);
                    }
                }

                lastCleanTime = currentTime;
            }
        } catch (IOException e) {
            LOG.error("Failed to remove obsolete checkpoint data for batch-" + obsoleteBatchId, e);
        }

        if (JStormMetrics.enabled)
            this.hdfsDeleteLatency.update(System.currentTimeMillis() - startTime);
    }

    public static void main(String[] args) {
        Map conf = new HashMap<Object, Object>();
        conf.putAll(Utils.loadConf(args[0]));
        RocksDbHdfsState<String, Integer> state = new RocksDbHdfsState<String, Integer>();
        state.setStateName(String.valueOf(1));

        // setup checkpoint
        int batchNum = JStormUtils.parseInt(conf.get("batch.num"), 100);
        state.initEnv("test", conf, "/tmp/rocksdb_test");
        String remoteCpPath = null;
        for (int i = 0; i < batchNum; i++) {
            state.put(String.valueOf(i % 20), i);
            state.checkpoint(i);
            remoteCpPath = state.backup(i);
            state.remove(i);
        }
        state.cleanup();

        state.initEnv("test", conf, "/tmp/rocksdb_test");
        state.restore(remoteCpPath);
        for (int i = 0; i < 20; i++) {
            Integer value = state.get(String.valueOf(i));
            LOG.info("key={}, value={}", String.valueOf(i), value);
        }
        state.cleanup();
    }
}