org.apache.druid.storage.s3.S3DataSegmentFinder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.storage.s3.S3DataSegmentFinder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.s3;

import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.segment.loading.DataSegmentFinder;
import org.apache.druid.segment.loading.SegmentLoadingException;
import org.apache.druid.timeline.DataSegment;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class S3DataSegmentFinder implements DataSegmentFinder {
    private static final Logger log = new Logger(S3DataSegmentFinder.class);

    private final ServerSideEncryptingAmazonS3 s3Client;
    private final ObjectMapper jsonMapper;
    private final S3DataSegmentPusherConfig config;

    @Inject
    public S3DataSegmentFinder(ServerSideEncryptingAmazonS3 s3Client, S3DataSegmentPusherConfig config,
            ObjectMapper jsonMapper) {
        this.s3Client = s3Client;
        this.config = config;
        this.jsonMapper = jsonMapper;
    }

    @Override
    public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor)
            throws SegmentLoadingException {
        final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>();

        try {
            final Iterator<S3ObjectSummary> objectSummaryIterator = S3Utils.objectSummaryIterator(s3Client,
                    config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath,
                    config.getMaxListingLength());

            while (objectSummaryIterator.hasNext()) {
                final S3ObjectSummary objectSummary = objectSummaryIterator.next();

                if ("descriptor.json".equals(S3Utils.toFilename(objectSummary.getKey()))) {
                    final String descriptorJson = objectSummary.getKey();
                    String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson);

                    if (S3Utils.isObjectInBucketIgnoringPermission(s3Client, config.getBucket(), indexZip)) {
                        try (S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson);
                                S3ObjectInputStream is = indexObject.getObjectContent()) {
                            final ObjectMetadata objectMetadata = indexObject.getObjectMetadata();
                            final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class);
                            log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);

                            final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                            if (!S3StorageDruidModule.SCHEME.equals(loadSpec.get("type"))
                                    || !indexZip.equals(loadSpec.get("key"))
                                    || !config.getBucket().equals(loadSpec.get("bucket"))) {
                                loadSpec.put("type", S3StorageDruidModule.SCHEME);
                                loadSpec.put("key", indexZip);
                                loadSpec.put("bucket", config.getBucket());
                                if (updateDescriptor) {
                                    log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]",
                                            descriptorJson, indexObject);
                                    final ByteArrayInputStream bais = new ByteArrayInputStream(
                                            StringUtils.toUtf8(jsonMapper.writeValueAsString(dataSegment)));
                                    s3Client.putObject(config.getBucket(), descriptorJson, bais, objectMetadata);
                                }
                            }

                            DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment,
                                    objectMetadata.getLastModified() == null ? 0
                                            : objectMetadata.getLastModified().getTime());
                        }
                    } else {
                        throw new SegmentLoadingException(
                                "index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip);
                    }
                }
            }
        } catch (AmazonServiceException e) {
            throw new SegmentLoadingException(e, "Problem interacting with S3");
        } catch (IOException e) {
            throw new SegmentLoadingException(e, "IO exception");
        } catch (Exception e) {
            Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class);
            Throwables.propagate(e);
        }
        return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet());
    }
}