com.cloudera.cdk.maven.plugins.AbstractDatasetMojo.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.maven.plugins.AbstractDatasetMojo.java

Source

/**
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.cdk.maven.plugins;

import com.cloudera.cdk.data.DatasetDescriptor;
import com.cloudera.cdk.data.DatasetRepositories;
import com.cloudera.cdk.data.DatasetRepository;
import com.cloudera.cdk.data.filesystem.FileSystemDatasetRepository;
import com.cloudera.cdk.data.hcatalog.HCatalogDatasetRepository;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.io.Resources;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.maven.artifact.DependencyResolutionRequiredException;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugins.annotations.Parameter;

abstract class AbstractDatasetMojo extends AbstractHadoopMojo {

    /**
     * The root directory of the dataset repository. Optional if using HCatalog for metadata storage.
     */
    @Parameter(property = "cdk.rootDirectory")
    protected String rootDirectory;

    /**
     * If true, store dataset metadata in HCatalog, otherwise store it on the filesystem.
     */
    @Parameter(property = "cdk.hcatalog")
    protected boolean hcatalog = true;

    /**
     * The URI specifying the dataset repository, e.g. <i>repo:hdfs://host:8020/data</i>.
     * Optional, but if specified then <code>cdk.rootDirectory</code> and
     * <code>cdk.hcatalog</code> are ignored.
     */
    @Parameter(property = "cdk.repositoryUri")
    protected String repositoryUri;

    /**
     * Hadoop configuration properties.
     */
    @Parameter(property = "cdk.hadoopConfiguration")
    private Properties hadoopConfiguration;

    @VisibleForTesting
    FileSystemDatasetRepository.Builder fsRepoBuilder = new FileSystemDatasetRepository.Builder();

    @VisibleForTesting
    HCatalogDatasetRepository.Builder hcatRepoBuilder = new HCatalogDatasetRepository.Builder();

    private Configuration getConf() {
        Configuration conf = new Configuration(false);
        for (String key : hadoopConfiguration.stringPropertyNames()) {
            String value = hadoopConfiguration.getProperty(key);
            conf.set(key, value);
        }
        return conf;
    }

    DatasetRepository getDatasetRepository() {
        DatasetRepository repo;
        if (repositoryUri != null) {
            return DatasetRepositories.open(repositoryUri);
        }
        if (!hcatalog && rootDirectory == null) {
            throw new IllegalArgumentException("Root directory must be specified if not " + "using HCatalog.");
        }
        if (rootDirectory != null) {
            try {
                URI uri = new URI(rootDirectory);
                if (hcatalog) {
                    hcatRepoBuilder.rootDirectory(uri);
                } else {
                    fsRepoBuilder.rootDirectory(uri);
                }
            } catch (URISyntaxException e) {
                throw new IllegalArgumentException(e);
            }
        }
        if (hcatalog) {
            repo = hcatRepoBuilder.configuration(getConf()).build();
        } else {
            repo = fsRepoBuilder.configuration(getConf()).build();
        }
        return repo;
    }

    void configureSchema(DatasetDescriptor.Builder descriptorBuilder, String avroSchemaFile,
            String avroSchemaReflectClass) throws MojoExecutionException {
        if (avroSchemaFile != null) {
            File avroSchema = new File(avroSchemaFile);
            try {
                if (avroSchema.exists()) {
                    descriptorBuilder.schema(avroSchema);
                } else {
                    descriptorBuilder.schema(Resources.getResource(avroSchemaFile).openStream());
                }
            } catch (IOException e) {
                throw new MojoExecutionException("Problem while reading file " + avroSchemaFile, e);
            }
        } else if (avroSchemaReflectClass != null) {

            try {
                List<URL> classpath = new ArrayList<URL>();
                for (Object element : mavenProject.getCompileClasspathElements()) {
                    String path = (String) element;
                    classpath.add(new File(path).toURI().toURL());
                }
                ClassLoader parentClassLoader = getClass().getClassLoader(); // use Maven's classloader, not the system one
                ClassLoader classLoader = new URLClassLoader(classpath.toArray(new URL[classpath.size()]),
                        parentClassLoader);

                descriptorBuilder.schema(Class.forName(avroSchemaReflectClass, true, classLoader));
            } catch (ClassNotFoundException e) {
                throw new MojoExecutionException("Problem finding class " + avroSchemaReflectClass, e);
            } catch (MalformedURLException e) {
                throw new MojoExecutionException("Problem finding class " + avroSchemaReflectClass, e);
            } catch (DependencyResolutionRequiredException e) {
                throw new MojoExecutionException("Problem finding class " + avroSchemaReflectClass, e);
            }
        }
    }
}