Java tutorial
/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.kinesis.s3config; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.AmazonS3URI; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.facebook.presto.kinesis.ConnectorShutdown; import com.facebook.presto.kinesis.KinesisClientProvider; import com.facebook.presto.kinesis.KinesisConnectorConfig; import com.facebook.presto.kinesis.KinesisStreamDescription; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableMap; import io.airlift.json.JsonCodec; import io.airlift.log.Logger; import com.google.inject.Inject; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collection; import java.util.HashMap; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import static java.util.Objects.requireNonNull; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; /** * Utility class to retrieve table definitions from a common place on Amazon S3. * * This is so that we can add new tables in a central "metastore" location without * having to update every single node with the files. * * This makes calls to Amazon AWS using the S3 client. */ public class S3TableConfigClient implements ConnectorShutdown { private static final Logger log = Logger.get(S3TableConfigClient.class); private static final KinesisStreamDescription dummyStreamDesc = new KinesisStreamDescription("__DUMMY__", "__DUMMY__", "__DUMMY__", null); public final KinesisConnectorConfig kinesisConnectorConfig; private final KinesisClientProvider clientManager; private final JsonCodec<KinesisStreamDescription> streamDescriptionCodec; private final String bucketUrl; private long lastCheck = 0; private ScheduledFuture<?> updateTaskHandle = null; private HashMap<String, KinesisStreamDescription> internalMap = new HashMap<String, KinesisStreamDescription>(); private ReentrantReadWriteLock internalMapLock = new ReentrantReadWriteLock(); @Inject public S3TableConfigClient(KinesisConnectorConfig aConnectorConfig, KinesisClientProvider aClientManager, JsonCodec<KinesisStreamDescription> jsonCodec) { this.kinesisConnectorConfig = requireNonNull(aConnectorConfig, "connector configuration object is null"); this.clientManager = requireNonNull(aClientManager, "client manager object is null"); this.streamDescriptionCodec = requireNonNull(jsonCodec, "JSON codec object is null"); // If using S3 start thread that periodically looks for updates this.bucketUrl = this.kinesisConnectorConfig.getTableDescriptionsS3(); if (!this.bucketUrl.isEmpty()) { startS3Updates(); } } /** Indicates this class is being used and actively reading table definitions from S3. */ public boolean isUsingS3() { return !this.bucketUrl.isEmpty(); } /** * Main entry point to get table definitions from S3 using bucket and object directory * given in the configuration. * * For safety, an immutable copy built from the internal map is returned. If multiple table * definitions for the same schema/table exist in the internal map, then the most recently * created one takes precedence (and a warning is logged). * * @return */ public Map<SchemaTableName, KinesisStreamDescription> getTablesFromS3() { HashMap<SchemaTableName, KinesisStreamDescription> intermediateMap = new HashMap<SchemaTableName, KinesisStreamDescription>(); internalMapLock.readLock().lock(); try { Collection<KinesisStreamDescription> streamValues = this.internalMap.values(); for (KinesisStreamDescription stream : streamValues) { SchemaTableName schemaTable = new SchemaTableName(stream.getSchemaName(), stream.getTableName()); KinesisStreamDescription currentValue = intermediateMap.get(schemaTable); if (currentValue == null || stream.getCreationTimestamp() >= currentValue.getCreationTimestamp()) { intermediateMap.put(new SchemaTableName(stream.getSchemaName(), stream.getTableName()), stream); } if (currentValue != null) { log.warn( "Note: duplicate definitions found for table %s.%s - only most recent definition used.", stream.getSchemaName(), stream.getTableName()); } } } finally { internalMapLock.readLock().unlock(); } return ImmutableMap.copyOf(intermediateMap); } /** Shutdown any periodic update jobs. */ @Override public void shutdown() { if (isUsingS3() && updateTaskHandle != null) { updateTaskHandle.cancel(true); } return; } protected void startS3Updates() { if (updateTaskHandle == null) { ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); this.updateTaskHandle = scheduler.scheduleAtFixedRate(() -> updateTablesFromS3(), 5, 600, TimeUnit.SECONDS); log.info("Periodic read of S3 location for table definitions has started."); } return; } /** * Call S3 to get the most recent object list. * * This is an object list request to AWS in the given "directory". * * @return */ protected List<S3ObjectSummary> getObjectSummaries() { AmazonS3Client s3client = this.clientManager.getS3Client(); AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl); ArrayList<S3ObjectSummary> returnList = new ArrayList<S3ObjectSummary>(); try { log.info("Getting the listing of objects in the S3 table config directory: bucket %s prefix %s :", directoryURI.getBucket(), directoryURI.getKey()); ListObjectsRequest req = new ListObjectsRequest().withBucketName(directoryURI.getBucket()) .withPrefix(directoryURI.getKey() + "/").withDelimiter("/").withMaxKeys(25); ObjectListing result; do { result = s3client.listObjects(req); returnList.addAll(result.getObjectSummaries()); req.setMarker(result.getNextMarker()); } while (result.isTruncated()); log.info("Completed getting S3 object listing."); } catch (AmazonServiceException ase) { StringBuilder sb = new StringBuilder(); sb.append("Caught an AmazonServiceException, which means your request made it "); sb.append("to Amazon S3, but was rejected with an error response for some reason.\n"); sb.append("Error Message: " + ase.getMessage()); sb.append("HTTP Status Code: " + ase.getStatusCode()); sb.append("AWS Error Code: " + ase.getErrorCode()); sb.append("Error Type: " + ase.getErrorType()); sb.append("Request ID: " + ase.getRequestId()); log.error(sb.toString(), ase); } catch (AmazonClientException ace) { StringBuilder sb = new StringBuilder(); sb.append("Caught an AmazonClientException, " + "which means the client encountered " + "an internal error while trying to communicate" + " with S3, " + "such as not being able to access the network."); sb.append("Error Message: " + ace.getMessage()); log.error(sb.toString(), ace); } return returnList; } /** * Connect to S3 directory to look for new or updated table definitions and then * update the map. */ protected void updateTablesFromS3() { long now = System.currentTimeMillis(); List<S3ObjectSummary> objectList = this.getObjectSummaries(); AmazonS3Client s3client = this.clientManager.getS3Client(); AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl); // Build map of "deltas" which in the end contains new definitions and deleted tables HashMap<String, KinesisStreamDescription> deltasMap = new HashMap<String, KinesisStreamDescription>(); internalMapLock.readLock().lock(); try { Iterator<String> keysIter = this.internalMap.keySet().iterator(); while (keysIter.hasNext()) { deltasMap.put(keysIter.next(), dummyStreamDesc); } } finally { internalMapLock.readLock().unlock(); } for (S3ObjectSummary objInfo : objectList) { if (!deltasMap.containsKey(objInfo.getKey()) || objInfo.getLastModified().getTime() >= this.lastCheck) { // New or updated file, so we must read from AWS try { if (objInfo.getKey().endsWith("/")) { continue; } log.info("Getting : %s - %s", objInfo.getBucketName(), objInfo.getKey()); S3Object object = s3client .getObject(new GetObjectRequest(objInfo.getBucketName(), objInfo.getKey())); StringBuilder resultStr = new StringBuilder(""); try (BufferedReader reader = new BufferedReader( new InputStreamReader(object.getObjectContent()))) { boolean hasMore = true; while (hasMore) { String line = reader.readLine(); if (line != null) { resultStr.append(line); } else { hasMore = false; } } KinesisStreamDescription table = streamDescriptionCodec.fromJson(resultStr.toString()); deltasMap.put(objInfo.getKey(), table); log.info("Put table description into the map from %s : %s.%s", objInfo.getKey(), table.getSchemaName(), table.getTableName()); } catch (IOException iox) { log.error("Problem reading input stream from object.", iox); } catch (IllegalArgumentException iax) { // Note: this gets thrown by airlift json library when the input is malformed. log.error("Invalid JSON table description.", iax); } } catch (AmazonServiceException ase) { StringBuilder sb = new StringBuilder(); sb.append("Caught an AmazonServiceException, which means your request made it "); sb.append("to Amazon S3, but was rejected with an error response for some reason.\n"); sb.append("Error Message: " + ase.getMessage()); sb.append("HTTP Status Code: " + ase.getStatusCode()); sb.append("AWS Error Code: " + ase.getErrorCode()); sb.append("Error Type: " + ase.getErrorType()); sb.append("Request ID: " + ase.getRequestId()); log.error(sb.toString(), ase); } catch (AmazonClientException ace) { StringBuilder sb = new StringBuilder(); sb.append("Caught an AmazonClientException, " + "which means the client encountered " + "an internal error while trying to communicate" + " with S3, " + "such as not being able to access the network."); sb.append("Error Message: " + ace.getMessage()); log.error(sb.toString(), ace); } } else if (deltasMap.containsKey(objInfo.getKey())) { deltasMap.remove(objInfo.getKey()); } } // end loop through object descriptions // Deltas: key pointing to dummy means delete, key pointing to other object means update. // This approach lets us delete and update while shortening the locked critical section. Iterator<Map.Entry<String, KinesisStreamDescription>> deltasIter = deltasMap.entrySet().iterator(); internalMapLock.writeLock().lock(); try { while (deltasIter.hasNext()) { Map.Entry<String, KinesisStreamDescription> entry = deltasIter.next(); if (entry.getValue().getTableName().equals("__DUMMY__")) { this.internalMap.remove(entry.getKey()); } else { this.internalMap.put(entry.getKey(), entry.getValue()); } } } finally { internalMapLock.writeLock().unlock(); } log.info("Completed updating table definitions from S3."); this.lastCheck = now; return; } }