Java tutorial
/* * Copyright 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.tool; import co.cask.cdap.api.artifact.ArtifactScope; import co.cask.cdap.api.artifact.ArtifactVersion; import co.cask.cdap.client.ApplicationClient; import co.cask.cdap.client.ArtifactClient; import co.cask.cdap.client.NamespaceClient; import co.cask.cdap.client.config.ClientConfig; import co.cask.cdap.client.config.ConnectionConfig; import co.cask.cdap.etl.proto.UpgradeContext; import co.cask.cdap.etl.proto.UpgradeableConfig; import co.cask.cdap.etl.proto.v2.ETLBatchConfig; import co.cask.cdap.etl.proto.v2.ETLConfig; import co.cask.cdap.etl.proto.v2.ETLRealtimeConfig; import co.cask.cdap.etl.tool.config.BatchClientBasedUpgradeContext; import co.cask.cdap.etl.tool.config.RealtimeClientBasedUpgradeContext; import co.cask.cdap.proto.ApplicationDetail; import co.cask.cdap.proto.ApplicationRecord; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.NamespaceMeta; import co.cask.cdap.proto.artifact.AppRequest; import co.cask.cdap.proto.artifact.ArtifactSummary; import co.cask.cdap.security.authentication.client.AccessToken; import com.google.common.collect.ImmutableSet; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.HashSet; import java.util.Set; import javax.annotation.Nullable; /** * Used to upgrade pipelines from older pipelines to current pipelines. */ public class UpgradeTool { private static final Logger LOG = LoggerFactory.getLogger(UpgradeTool.class); private static final String BATCH_NAME = "cdap-etl-batch"; private static final String REALTIME_NAME = "cdap-etl-realtime"; private static final Gson GSON = new GsonBuilder().setPrettyPrinting().create(); private static final int DEFAULT_READ_TIMEOUT_MILLIS = 90 * 1000; private final NamespaceClient namespaceClient; private final ApplicationClient appClient; private final ArtifactSummary batchArtifact; private final ArtifactSummary realtimeArtifact; private final ArtifactClient artifactClient; @Nullable private final File errorDir; private UpgradeTool(ClientConfig clientConfig, @Nullable File errorDir) { String version = ETLVersion.getVersion(); this.batchArtifact = new ArtifactSummary(BATCH_NAME, version, ArtifactScope.SYSTEM); this.realtimeArtifact = new ArtifactSummary(REALTIME_NAME, version, ArtifactScope.SYSTEM); this.appClient = new ApplicationClient(clientConfig); this.namespaceClient = new NamespaceClient(clientConfig); this.artifactClient = new ArtifactClient(clientConfig); this.errorDir = errorDir; } private Set<Id.Application> upgrade() throws Exception { Set<Id.Application> upgraded = new HashSet<>(); for (NamespaceMeta namespaceMeta : namespaceClient.list()) { Id.Namespace namespace = Id.Namespace.from(namespaceMeta.getName()); upgraded.addAll(upgrade(namespace)); } return upgraded; } private Set<Id.Application> upgrade(Id.Namespace namespace) throws Exception { Set<Id.Application> upgraded = new HashSet<>(); Set<String> artifactNames = ImmutableSet.of(BATCH_NAME, REALTIME_NAME); for (ApplicationRecord appRecord : appClient.list(namespace, artifactNames, null)) { Id.Application appId = Id.Application.from(namespace, appRecord.getName()); if (upgrade(appId)) { upgraded.add(appId); } } return upgraded; } private boolean upgrade(Id.Application appId) throws Exception { ApplicationDetail appDetail = appClient.get(appId); if (!shouldUpgrade(appDetail.getArtifact())) { return false; } LOG.info("Upgrading pipeline: {}", appId); String artifactName = appDetail.getArtifact().getName(); String artifactVersion = appDetail.getVersion(); if (BATCH_NAME.equals(artifactName)) { UpgradeContext upgradeContext = new BatchClientBasedUpgradeContext(appId.getNamespace(), artifactClient); ETLBatchConfig upgradedConfig = convertBatchConfig(artifactVersion, appDetail.getConfiguration(), upgradeContext); upgrade(appId, batchArtifact, upgradedConfig); } else if (REALTIME_NAME.equals(artifactName)) { UpgradeContext upgradeContext = new RealtimeClientBasedUpgradeContext(appId.getNamespace(), artifactClient); ETLRealtimeConfig upgradedConfig = convertRealtimeConfig(artifactVersion, appDetail.getConfiguration(), upgradeContext); upgrade(appId, realtimeArtifact, upgradedConfig); } else { // should never happen LOG.warn("Unknown app artifact {}. Skipping pipeline.", artifactName); return false; } return true; } private <T extends ETLConfig> void upgrade(Id.Application appId, ArtifactSummary appArtifact, T config) throws IOException { AppRequest<T> updateRequest = new AppRequest<>(appArtifact, config); try { appClient.update(appId, updateRequest); } catch (Exception e) { LOG.error("Error upgrading pipeline {}.", appId, e); if (errorDir != null) { File errorFile = new File(errorDir, String.format("%s-%s.json", appId.getNamespaceId(), appId.getId())); LOG.error("Writing config for pipeline {} to {} for further manual investigation.", appId, errorFile.getAbsolutePath()); try (OutputStreamWriter outputStreamWriter = new OutputStreamWriter( new FileOutputStream(errorFile))) { outputStreamWriter.write(GSON.toJson(config)); } } } } public static void main(String[] args) throws Exception { Options options = new Options().addOption(new Option("h", "help", false, "Print this usage message.")) .addOption(new Option("u", "uri", true, "CDAP instance URI to interact with in the format " + "[http[s]://]<hostname>:<port>. Defaults to localhost:10000.")) .addOption(new Option("a", "accesstoken", true, "File containing the access token to use when interacting " + "with a secure CDAP instance.")) .addOption(new Option("t", "timeout", true, "Timeout in milliseconds to use when interacting with the " + "CDAP RESTful APIs. Defaults to " + DEFAULT_READ_TIMEOUT_MILLIS + ".")) .addOption(new Option("n", "namespace", true, "Namespace to perform the upgrade in. If none is given, " + "pipelines in all namespaces will be upgraded.")) .addOption(new Option("p", "pipeline", true, "Name of the pipeline to upgrade. If specified, a namespace " + "must also be given.")) .addOption(new Option("f", "configfile", true, "File containing old application details to update. " + "The file contents are expected to be in the same format as the request body for creating an " + "ETL application from one of the etl artifacts. " + "It is expected to be a JSON Object containing 'artifact' and 'config' fields." + "The value for 'artifact' must be a JSON Object that specifies the artifact scope, name, and version. " + "The value for 'config' must be a JSON Object specifies the source, transforms, and sinks of the pipeline, " + "as expected by older versions of the etl artifacts.")) .addOption(new Option("o", "outputfile", true, "File to write the converted application details provided in " + "the configfile option. If none is given, results will be written to the input file + '.converted'. " + "The contents of this file can be sent directly to CDAP to update or create an application.")) .addOption(new Option("e", "errorDir", true, "Optional directory to write any upgraded pipeline configs that " + "failed to upgrade. The problematic configs can then be manually edited and upgraded separately. " + "Upgrade errors may happen for pipelines that use plugins that are not backwards compatible. " + "This directory must be writable by the user that is running this tool.")); CommandLineParser parser = new BasicParser(); CommandLine commandLine = parser.parse(options, args); String[] commandArgs = commandLine.getArgs(); // if help is an option, or if there isn't a single 'upgrade' command, print usage and exit. if (commandLine.hasOption("h") || commandArgs.length != 1 || !"upgrade".equalsIgnoreCase(commandArgs[0])) { HelpFormatter helpFormatter = new HelpFormatter(); helpFormatter.printHelp(UpgradeTool.class.getName() + " upgrade", "Upgrades Hydrator pipelines created for 3.2.x versions" + "of the cdap-etl-batch and cdap-etl-realtime artifacts into pipelines compatible with 3.3.x versions of " + "cdap-etl-batch and cdap-etl-realtime. Connects to an instance of CDAP to find any 3.2.x pipelines, then " + "upgrades those pipelines.", options, ""); System.exit(0); } ClientConfig clientConfig = getClientConfig(commandLine); if (commandLine.hasOption("f")) { String inputFilePath = commandLine.getOptionValue("f"); String outputFilePath = commandLine.hasOption("o") ? commandLine.getOptionValue("o") : inputFilePath + ".new"; convertFile(inputFilePath, outputFilePath, new ArtifactClient(clientConfig)); System.exit(0); } File errorDir = commandLine.hasOption("e") ? new File(commandLine.getOptionValue("e")) : null; if (errorDir != null) { if (!errorDir.exists()) { if (!errorDir.mkdirs()) { LOG.error("Unable to create error directory {}.", errorDir.getAbsolutePath()); System.exit(1); } } else if (!errorDir.isDirectory()) { LOG.error("{} is not a directory.", errorDir.getAbsolutePath()); System.exit(1); } else if (!errorDir.canWrite()) { LOG.error("Unable to write to error directory {}.", errorDir.getAbsolutePath()); System.exit(1); } } UpgradeTool upgradeTool = new UpgradeTool(clientConfig, errorDir); String namespace = commandLine.getOptionValue("n"); String pipelineName = commandLine.getOptionValue("p"); if (pipelineName != null) { if (namespace == null) { throw new IllegalArgumentException("Must specify a namespace when specifying a pipeline."); } Id.Application appId = Id.Application.from(namespace, pipelineName); if (upgradeTool.upgrade(appId)) { LOG.info("Successfully upgraded {}.", appId); } else { LOG.info("{} did not need to be upgraded.", appId); } System.exit(0); } if (namespace != null) { printUpgraded(upgradeTool.upgrade(Id.Namespace.from(namespace))); System.exit(0); } printUpgraded(upgradeTool.upgrade()); } private static void printUpgraded(Set<Id.Application> pipelines) { if (pipelines.size() == 0) { LOG.info("Did not find any pipelines that needed upgrading."); return; } LOG.info("Successfully upgraded {} pipelines:", pipelines.size()); for (Id.Application pipeline : pipelines) { LOG.info(" {}", pipeline); } } private static ClientConfig getClientConfig(CommandLine commandLine) throws IOException { String uriStr = commandLine.hasOption("u") ? commandLine.getOptionValue("u") : "localhost:10000"; if (!uriStr.contains("://")) { uriStr = "http://" + uriStr; } URI uri = URI.create(uriStr); String hostname = uri.getHost(); int port = uri.getPort(); boolean sslEnabled = "https".equals(uri.getScheme()); ConnectionConfig connectionConfig = ConnectionConfig.builder().setHostname(hostname).setPort(port) .setSSLEnabled(sslEnabled).build(); int readTimeout = commandLine.hasOption("t") ? Integer.parseInt(commandLine.getOptionValue("t")) : DEFAULT_READ_TIMEOUT_MILLIS; ClientConfig.Builder clientConfigBuilder = ClientConfig.builder().setDefaultReadTimeout(readTimeout) .setConnectionConfig(connectionConfig); if (commandLine.hasOption("a")) { String tokenFilePath = commandLine.getOptionValue("a"); File tokenFile = new File(tokenFilePath); if (!tokenFile.exists()) { throw new IllegalArgumentException("Access token file " + tokenFilePath + " does not exist."); } if (!tokenFile.isFile()) { throw new IllegalArgumentException("Access token file " + tokenFilePath + " is not a file."); } String tokenValue = new String(Files.readAllBytes(tokenFile.toPath()), StandardCharsets.UTF_8).trim(); AccessToken accessToken = new AccessToken(tokenValue, 82000L, "Bearer"); clientConfigBuilder.setAccessToken(accessToken); } return clientConfigBuilder.build(); } private static void convertFile(String configFilePath, String outputFilePath, ArtifactClient artifactClient) throws Exception { File configFile = new File(configFilePath); if (!configFile.exists()) { throw new IllegalArgumentException(configFilePath + " does not exist."); } if (!configFile.isFile()) { throw new IllegalArgumentException(configFilePath + " is not a file."); } String fileContents = new String(Files.readAllBytes(configFile.toPath()), StandardCharsets.UTF_8); ETLAppRequest artifactFile = GSON.fromJson(fileContents, ETLAppRequest.class); if (!shouldUpgrade(artifactFile.artifact)) { throw new IllegalArgumentException("Cannot update for artifact " + artifactFile.artifact + ". " + "Please check the artifact is cdap-etl-batch or cdap-etl-realtime in the system scope of version 3.2.x."); } String version = ETLVersion.getVersion(); File outputFile = new File(outputFilePath); String oldArtifactVersion = artifactFile.artifact.getVersion(); if (BATCH_NAME.equals(artifactFile.artifact.getName())) { ArtifactSummary artifact = new ArtifactSummary(BATCH_NAME, version, ArtifactScope.SYSTEM); UpgradeContext upgradeContext = new BatchClientBasedUpgradeContext(Id.Namespace.DEFAULT, artifactClient); ETLBatchConfig config = convertBatchConfig(oldArtifactVersion, artifactFile.config.toString(), upgradeContext); AppRequest<ETLBatchConfig> updated = new AppRequest<>(artifact, config); try (BufferedWriter writer = Files.newBufferedWriter(outputFile.toPath(), StandardCharsets.UTF_8)) { writer.write(GSON.toJson(updated)); } } else { ArtifactSummary artifact = new ArtifactSummary(REALTIME_NAME, version, ArtifactScope.SYSTEM); UpgradeContext upgradeContext = new RealtimeClientBasedUpgradeContext(Id.Namespace.DEFAULT, artifactClient); ETLRealtimeConfig config = convertRealtimeConfig(oldArtifactVersion, artifactFile.config.toString(), upgradeContext); AppRequest<ETLRealtimeConfig> updated = new AppRequest<>(artifact, config); try (BufferedWriter writer = Files.newBufferedWriter(outputFile.toPath(), StandardCharsets.UTF_8)) { writer.write(GSON.toJson(updated)); } } LOG.info("Successfully converted application details from file " + configFilePath + ". " + "Results have been written to " + outputFilePath); } // class used to deserialize the old pipeline requests @SuppressWarnings("unused") private static class ETLAppRequest { private ArtifactSummary artifact; private JsonObject config; } private static boolean shouldUpgrade(ArtifactSummary artifactSummary) { // check its the system etl artifacts if (artifactSummary.getScope() != ArtifactScope.SYSTEM) { return false; } if (!BATCH_NAME.equals(artifactSummary.getName()) && !REALTIME_NAME.equals(artifactSummary.getName())) { return false; } // check its 3.x.x versions of etl ArtifactVersion artifactVersion = new ArtifactVersion(artifactSummary.getVersion()); Integer majorVersion = artifactVersion.getMajor(); Integer minorVersion = artifactVersion.getMinor(); return majorVersion != null && majorVersion == 3 && minorVersion != null && minorVersion >= 2 && minorVersion < 4; } private static ETLBatchConfig convertBatchConfig(String artifactVersion, String configStr, UpgradeContext upgradeContext) { UpgradeableConfig config; if (artifactVersion.startsWith("3.2.")) { config = GSON.fromJson(configStr, co.cask.cdap.etl.proto.v0.ETLBatchConfig.class); } else if (artifactVersion.startsWith("3.3.")) { config = GSON.fromJson(configStr, co.cask.cdap.etl.proto.v1.ETLBatchConfig.class); } else { LOG.warn("Unknown artifact version {}. Skipping pipeline.", artifactVersion); // should never happen return null; } while (config.canUpgrade()) { config = config.upgrade(upgradeContext); } return (ETLBatchConfig) config; } private static ETLRealtimeConfig convertRealtimeConfig(String artifactVersion, String configStr, UpgradeContext upgradeContext) { UpgradeableConfig config; if (artifactVersion.startsWith("3.2.")) { config = GSON.fromJson(configStr, co.cask.cdap.etl.proto.v0.ETLRealtimeConfig.class); } else if (artifactVersion.startsWith("3.3.")) { config = GSON.fromJson(configStr, co.cask.cdap.etl.proto.v1.ETLRealtimeConfig.class); } else { LOG.warn("Unknown artifact version {}. Skipping pipeline.", artifactVersion); // should never happen return null; } while (config.canUpgrade()) { config = config.upgrade(upgradeContext); } return (ETLRealtimeConfig) config; } }