com.uber.hoodie.common.table.log.HoodieLogFormatAppendFailureTest.java Source code

Introduction

Here is the source code for com.uber.hoodie.common.table.log.HoodieLogFormatAppendFailureTest.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.common.table.log;

import static com.uber.hoodie.common.util.SchemaTestUtil.getSimpleSchema;

import com.google.common.collect.Maps;
import com.uber.hoodie.common.minicluster.MiniClusterUtil;
import com.uber.hoodie.common.model.HoodieArchivedLogFile;
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
import com.uber.hoodie.common.util.SchemaTestUtil;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeoutException;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

/**
 * This class is intentionally using a different way of setting up the MiniDFSCluster and not relying on
 * {@link MiniClusterUtil} to reproduce append() issue : https://issues.apache.org/jira/browse/HDFS-6325
 * Reference : https://issues.apache.org/jira/secure/attachment/12645053/HDFS-6325.patch
 */
public class HoodieLogFormatAppendFailureTest {

    private static File baseDir;
    private static MiniDFSCluster cluster;

    @BeforeClass
    public static void setUpClass() throws IOException {
        // NOTE : The MiniClusterDFS leaves behind the directory under which the cluster was created
        baseDir = new File("/tmp/" + UUID.randomUUID().toString());
        FileUtil.fullyDelete(baseDir);
        // Append is not supported in LocalFileSystem. HDFS needs to be setup.
        Configuration conf = new Configuration();
        // lower heartbeat interval for fast recognition of DN
        conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath());
        conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000);
        conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
        conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 3000);
        cluster = new MiniDFSCluster.Builder(conf).checkExitOnShutdown(true).numDataNodes(4).build();
    }

    @AfterClass
    public static void tearDownClass() {
        cluster.shutdown(true);
        // Force clean up the directory under which the cluster was created
        FileUtil.fullyDelete(baseDir);
    }

    @Test(timeout = 60000)
    public void testFailedToGetAppendStreamFromHDFSNameNode()
            throws IOException, URISyntaxException, InterruptedException, TimeoutException {

        // Use some fs like LocalFileSystem, that does not support appends
        String uuid = UUID.randomUUID().toString();
        Path localPartitionPath = new Path("/tmp/");
        FileSystem fs = cluster.getFileSystem();
        Path testPath = new Path(localPartitionPath, uuid);
        fs.mkdirs(testPath);

        // Some data & append.
        List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10);
        Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
        header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
        header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
        HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

        Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
                .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
                .overBaseCommit("").withFs(fs).build();

        writer = writer.appendBlock(dataBlock);
        // get the current log file version to compare later
        int logFileVersion = writer.getLogFile().getLogVersion();
        Path logFilePath = writer.getLogFile().getPath();
        writer.close();

        // Wait for 3 times replication of file
        DFSTestUtil.waitReplication(fs, logFilePath, (short) 3);
        // Shut down all DNs that have the last block location for the file
        LocatedBlocks lbs = cluster.getFileSystem().getClient().getNamenode()
                .getBlockLocations("/tmp/" + uuid + "/" + logFilePath.getName(), 0, Long.MAX_VALUE);
        List<DataNode> dnsOfCluster = cluster.getDataNodes();
        DatanodeInfo[] dnsWithLocations = lbs.getLastLocatedBlock().getLocations();
        for (DataNode dn : dnsOfCluster) {
            for (DatanodeInfo loc : dnsWithLocations) {
                if (dn.getDatanodeId().equals(loc)) {
                    dn.shutdown();
                    cluster.stopDataNode(dn.getDisplayName());
                    DFSTestUtil.waitForDatanodeDeath(dn);
                }
            }
        }
        // Wait for the replication of this file to go down to 0
        DFSTestUtil.waitReplication(fs, logFilePath, (short) 0);

        // Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and
        // return a new writer with a bumped up logVersion
        writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
                .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
                .overBaseCommit("").withFs(fs).build();
        // The log version should be different for this new writer
        Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion);
    }

}