com.splicemachine.derby.stream.spark.SMOutputFormatTest.java Source code

Introduction

Here is the source code for com.splicemachine.derby.stream.spark.SMOutputFormatTest.java
Source

/*
 * Copyright 2012 - 2016 Splice Machine, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */

package com.splicemachine.derby.stream.spark;

import com.splicemachine.access.HConfiguration;
import com.splicemachine.db.iapi.error.StandardException;
import com.splicemachine.db.iapi.sql.execute.ExecRow;
import com.splicemachine.db.iapi.types.RowLocation;
import com.splicemachine.derby.impl.SpliceSpark;
import com.splicemachine.derby.stream.BaseStreamTest;
import com.splicemachine.derby.stream.spark.fake.FakeOutputFormat;
import com.splicemachine.derby.stream.spark.fake.FakeTableWriterBuilder;
import com.splicemachine.derby.stream.utils.TableWriterUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.spark.SparkException;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.PairFunction;
import org.junit.Assert;
import org.junit.Test;
import scala.Tuple2;
import scala.util.Either;

import java.io.*;

public class SMOutputFormatTest extends BaseStreamTest {

    @Test
    public void readExceptionsCauseAbort() throws StandardException, IOException {
        SparkPairDataSet<ExecRow, ExecRow> dataset = new SparkPairDataSet<>(
                SpliceSpark.getContext().parallelizePairs(tenRows).mapToPair(new FailFunction()));
        JavaPairRDD<ExecRow, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

        final Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(false));
        conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
        File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
        file.delete();
        file.mkdir();
        conf.set("abort.directory", file.getAbsolutePath());
        try {
            rdd.saveAsNewAPIHadoopDataset(conf);
            Assert.fail("Expected exception");
        } catch (Exception se) {
            Assert.assertTrue("Unexpected exception", se instanceof SparkException);
        }
        File[] files = file.listFiles();
        Assert.assertTrue("Abort() not called", files.length > 0);
    }

    @Test
    public void writeExceptionsCauseAbort() throws StandardException, IOException {
        SparkPairDataSet<RowLocation, ExecRow> dataset = new SparkPairDataSet<>(
                SpliceSpark.getContext().parallelizePairs(tenRows).mapToPair(new ToRowLocationFunction()));
        JavaPairRDD<RowLocation, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

        final Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(true));
        conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
        File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
        file.delete();
        file.mkdir();
        conf.set("abort.directory", file.getAbsolutePath());
        try {
            rdd.saveAsNewAPIHadoopDataset(conf);
            Assert.fail("Expected exception");
        } catch (Exception se) {
            Assert.assertTrue("Unexpected exception", se instanceof SparkException);
        }
        File[] files = file.listFiles();
        Assert.assertTrue("Abort() not called", files.length > 0);
    }

    @Test
    public void abortNotCalled() throws StandardException, IOException {
        SparkPairDataSet<RowLocation, ExecRow> dataset = new SparkPairDataSet<>(
                SpliceSpark.getContext().parallelizePairs(tenRows).mapToPair(new ToRowLocationFunction()));
        JavaPairRDD<RowLocation, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

        final Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(false));
        conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
        File file = File.createTempFile(SMOutputFormatTest.class.getName(), "noException");
        file.delete();
        file.mkdir();
        conf.set("abort.directory", file.getAbsolutePath());
        rdd.saveAsNewAPIHadoopDataset(conf);
        File[] files = file.listFiles();
        Assert.assertEquals("Abort() was called", 0, files.length);
    }

    public static class FailFunction
            implements Serializable, PairFunction<Tuple2<ExecRow, ExecRow>, ExecRow, ExecRow> {
        @Override
        public Tuple2<ExecRow, ExecRow> call(Tuple2<ExecRow, ExecRow> execRowExecRowTuple2) throws Exception {
            throw new NullPointerException("fail");
        }
    }

    public static class ToRowLocationFunction
            implements Serializable, PairFunction<Tuple2<ExecRow, ExecRow>, RowLocation, ExecRow> {
        @Override
        public Tuple2<RowLocation, ExecRow> call(Tuple2<ExecRow, ExecRow> t) throws Exception {
            return new Tuple2<>(null, t._2());
        }
    }
}