co.cask.cdap.etl.batch.ETLBatchTestBase.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.etl.batch.ETLBatchTestBase.java

Source

/*
 * Copyright  2015-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.etl.batch;

import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet;
import co.cask.cdap.etl.mock.test.HydratorTestBase;
import co.cask.cdap.proto.artifact.ArtifactSummary;
import co.cask.cdap.proto.id.ArtifactId;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.test.TestConfiguration;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.twill.filesystem.Location;
import org.junit.BeforeClass;
import org.junit.ClassRule;

import java.io.IOException;
import java.util.List;

/**
 * Base test class that sets up plugins and the batch template.
 */
public class ETLBatchTestBase extends HydratorTestBase {
    protected static final ArtifactId APP_ARTIFACT_ID = new ArtifactId(NamespaceId.DEFAULT.getNamespace(), "app",
            "1.0.0");
    protected static final ArtifactSummary APP_ARTIFACT = new ArtifactSummary("app", "1.0.0");
    private static int startCount = 0;

    @ClassRule
    public static final TestConfiguration CONFIG = new TestConfiguration("explore.enabled", false);

    @BeforeClass
    public static void setupTest() throws Exception {
        if (startCount++ > 0) {
            return;
        }
        setupBatchArtifacts(APP_ARTIFACT_ID, ETLBatchApplication.class);
    }

    protected List<GenericRecord> readOutput(TimePartitionedFileSet fileSet, Schema schema) throws IOException {
        org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
        List<GenericRecord> records = Lists.newArrayList();
        for (Location dayLoc : fileSet.getEmbeddedFileSet().getBaseLocation().list()) {
            // this level should be the day (ex: 2015-01-19)
            for (Location timeLoc : dayLoc.list()) {
                // this level should be the time (ex: 21-23.1234567890000)
                for (Location file : timeLoc.list()) {
                    // this level should be the actual mapred output
                    String locName = file.getName();

                    if (locName.endsWith(".avro")) {
                        DataFileStream<GenericRecord> fileStream = new DataFileStream<>(file.getInputStream(),
                                datumReader);
                        Iterables.addAll(records, fileStream);
                        fileStream.close();
                    }
                }
            }
        }
        return records;
    }
}