Java tutorial
/* * Copyright 2017 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.hydrator.plugin.db.batch.action; import co.cask.cdap.api.annotation.Description; import co.cask.cdap.api.annotation.Macro; import co.cask.cdap.api.annotation.Name; import co.cask.cdap.api.annotation.Plugin; import co.cask.cdap.api.plugin.PluginConfig; import co.cask.cdap.etl.api.PipelineConfigurer; import co.cask.cdap.etl.api.action.Action; import co.cask.cdap.etl.api.action.ActionContext; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.vertica.jdbc.VerticaConnection; import com.vertica.jdbc.VerticaCopyStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileNotFoundException; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.Driver; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; /** * Runs a query after a pipeline run. */ @Plugin(type = Action.PLUGIN_TYPE) @Name("VerticaBulkImportAction") @Description("Vertica bulk load plugin") public class VerticaBulkImportAction extends Action { private static final Logger LOG = LoggerFactory.getLogger(VerticaBulkImportAction.class); private final VerticaConfig config; public VerticaBulkImportAction(VerticaConfig config) { this.config = config; } @Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException { validateInputs(); } private void validateInputs() { Preconditions.checkArgument(!(config.user == null && config.password != null), "user is null. Please provide both user name and password if database requires " + "authentication. If not, please remove password and retry."); if (config.level.equalsIgnoreCase("basic")) { if (Strings.isNullOrEmpty(config.tableName)) { throw new IllegalArgumentException( "Table name must be provided in basic level for Vertica Bulk load"); } if (Strings.isNullOrEmpty(config.delimiter)) { throw new IllegalArgumentException( "Delimiter must be provided in basic level for Vertica Bulk load"); } } else { if (Strings.isNullOrEmpty(config.copyStatement)) { throw new IllegalArgumentException( "Copy statement can not be null or empty for Advanced level. Please check " + "copyStatement property"); } } } @Override public void run(ActionContext context) throws Exception { Object driver = Class.forName("com.vertica.jdbc.Driver").newInstance(); DriverManager.registerDriver((Driver) driver); Preconditions.checkArgument(tableExists(config.tableName), "Table %s does not exist. Please check that the 'tableName' property " + "has been set correctly, and that the connection string %s points to a valid database.", config.tableName, config.connectionString); String copyStatement; if (config.level.equalsIgnoreCase("basic")) { // COPY tableName FROM STDIN DELIMITER 'delimiter' copyStatement = String.format("COPY %s FROM STDIN DELIMITER '%s'", config.tableName, config.delimiter); } else { copyStatement = config.copyStatement; } LOG.debug("Copy statement is: {}", copyStatement); try { try (Connection connection = DriverManager.getConnection(config.connectionString, config.user, config.password)) { connection.setAutoCommit(false); // run Copy statement VerticaCopyStream stream = new VerticaCopyStream((VerticaConnection) connection, copyStatement); // Keep running count of the number of rejects int totalRejects = 0; // start() starts the stream process, and opens the COPY command. stream.start(); FileSystem fs = FileSystem.get(new Configuration()); List<String> fileList = new ArrayList<>(); FileStatus[] fileStatus; try { fileStatus = fs.listStatus(new Path(config.path)); for (FileStatus fileStat : fileStatus) { fileList.add(fileStat.getPath().toString()); } } catch (FileNotFoundException e) { throw new IllegalArgumentException(String.format(String.format( "Path %s not found on file system. Please provide correct path.", config.path), e)); } if (fileStatus.length <= 0) { LOG.warn("No files available to load into vertica database"); } for (String file : fileList) { Path path = new Path(file); FSDataInputStream inputStream = fs.open(path); // Add stream to the VerticaCopyStream stream.addStream(inputStream); // call execute() to load the newly added stream. You could // add many streams and call execute once to load them all. // Which method you choose depends mainly on whether you want // the ability to check the number of rejections as the load // progresses so you can stop if the number of rejects gets too // high. Also, high numbers of InputStreams could create a // resource issue on your client system. stream.execute(); // Show any rejects from this execution of the stream load // getRejects() returns a List containing the // row numbers of rejected rows. List<Long> rejects = stream.getRejects(); // The size of the list gives you the number of rejected rows. int numRejects = rejects.size(); totalRejects += numRejects; if (config.autoCommit.equalsIgnoreCase("true")) { // Commit the loaded data connection.commit(); } } // Finish closes the COPY command. It returns the number of // rows inserted. long results = stream.finish(); context.getMetrics().gauge("num.of.rows.rejected", totalRejects); context.getMetrics().gauge("num.of.rows.inserted", results); // Commit the loaded data connection.commit(); } } catch (Exception e) { throw new RuntimeException(String.format("Exception while running copy statement %s", copyStatement), e); } finally { DriverManager.deregisterDriver((Driver) driver); } } /** * Vertica config */ public class VerticaConfig extends PluginConfig { public static final String CONNECTION_STRING = "connectionString"; public static final String USER = "user"; public static final String PASSWORD = "password"; public static final String COPYSTATEMENT = "copyStatement"; public static final String PATH = "path"; public static final String LEVEL = "level"; public static final String TABLE = "tableName"; public static final String DELIMITER = "delimiter"; public static final String AUTO_COMMIT = "autoCommit"; @Name(CONNECTION_STRING) @Description("JDBC connection string including database name.") @Macro public String connectionString; @Name(USER) @Description("User to use to connect to the specified database. Required for databases that " + "need authentication. Optional for databases that do not require authentication.") @Nullable @Macro public String user; @Name(PASSWORD) @Description("Password to use to connect to the specified database. Required for databases that " + "need authentication. Optional for databases that do not require authentication.") @Nullable @Macro public String password; @Name(LEVEL) @Description("Copy statement query level. Basic automatically creates copy statement with tableName and delimiter. " + "To use more options please choose Advanced option.") @Macro public String level; @Name(TABLE) @Description("Name of the vertica table where data will be bulk loaded.") @Nullable @Macro public String tableName; @Name(DELIMITER) @Description("Delimiter in input files. Each delimited values will become columns in specified vertica table") @Nullable @Macro public String delimiter; @Name(COPYSTATEMENT) @Description("Copy statement to bulk load into vertica. This query must use the COPY statement to load data from " + "STDIN. Unlike copying from a file on the host, you do not need superuser privileges to copy a stream. All " + "your user account needs is INSERT privileges on the target table.") @Macro @Nullable public String copyStatement; @Name(PATH) @Description("File directory path from where all the file need to be loaded to vertica.") @Macro public String path; @Name(AUTO_COMMIT) @Description("Auto commit after every file? Or commit after all the files are loaded? If selected true, commit is" + " applied for every file.") public String autoCommit; public VerticaConfig(String connectionString, String user, String password, String level, String tableName, String delimiter, String copyStatement, String path) { this.connectionString = connectionString; this.user = user; this.password = password; this.level = level; this.tableName = tableName; this.delimiter = delimiter; this.copyStatement = copyStatement; this.path = path; } } public boolean tableExists(String tableName) { Connection connection; try { if (config.user == null) { connection = DriverManager.getConnection(config.connectionString); } else { connection = DriverManager.getConnection(config.connectionString, config.user, config.password); } try { DatabaseMetaData metadata = connection.getMetaData(); try (ResultSet rs = metadata.getTables(null, null, tableName, null)) { return rs.next(); } } finally { connection.close(); } } catch (SQLException e) { LOG.error("Exception while trying to check the existence of database table {} for connection {}.", tableName, config.connectionString, e); throw Throwables.propagate(e); } } }