com.metamx.druid.loading.S3DataSegmentPuller.java Source code

Java tutorial

Introduction

Here is the source code for com.metamx.druid.loading.S3DataSegmentPuller.java

Source

/*
 * Druid - a distributed column store.
 * Copyright (C) 2012  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package com.metamx.druid.loading;

import com.google.common.base.Throwables;
import com.google.common.io.ByteStreams;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.common.MapUtils;
import com.metamx.common.logger.Logger;
import com.metamx.druid.client.DataSegment;
import com.metamx.druid.common.s3.S3Utils;
import com.metamx.druid.utils.CompressionUtils;
import org.apache.commons.io.FileUtils;
import org.jets3t.service.ServiceException;
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
import org.jets3t.service.model.S3Object;
import org.jets3t.service.model.StorageObject;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.zip.GZIPInputStream;

/**
 */
public class S3DataSegmentPuller implements DataSegmentPuller {
    private static final Logger log = new Logger(S3DataSegmentPuller.class);

    private static final String BUCKET = "bucket";
    private static final String KEY = "key";

    private final RestS3Service s3Client;

    @Inject
    public S3DataSegmentPuller(RestS3Service s3Client) {
        this.s3Client = s3Client;
    }

    @Override
    public void getSegmentFiles(final DataSegment segment, final File outDir) throws SegmentLoadingException {
        final S3Coords s3Coords = new S3Coords(segment);

        log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);

        if (!isObjectInBucket(s3Coords)) {
            throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
        }

        if (!outDir.exists()) {
            outDir.mkdirs();
        }

        if (!outDir.isDirectory()) {
            throw new ISE("outDir[%s] must be a directory.", outDir);
        }

        try {
            S3Utils.retryS3Operation(new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    long startTime = System.currentTimeMillis();
                    S3Object s3Obj = null;

                    try {
                        s3Obj = s3Client.getObject(s3Coords.bucket, s3Coords.path);

                        InputStream in = null;
                        try {
                            in = s3Obj.getDataInputStream();
                            final String key = s3Obj.getKey();
                            if (key.endsWith(".zip")) {
                                CompressionUtils.unzip(in, outDir);
                            } else if (key.endsWith(".gz")) {
                                final File outFile = new File(outDir, toFilename(key, ".gz"));
                                ByteStreams.copy(new GZIPInputStream(in), Files.newOutputStreamSupplier(outFile));
                            } else {
                                ByteStreams.copy(in,
                                        Files.newOutputStreamSupplier(new File(outDir, toFilename(key, ""))));
                            }
                            log.info("Pull of file[%s] completed in %,d millis", s3Obj,
                                    System.currentTimeMillis() - startTime);
                            return null;
                        } catch (IOException e) {
                            FileUtils.deleteDirectory(outDir);
                            throw new IOException(String.format("Problem decompressing object[%s]", s3Obj), e);
                        } finally {
                            Closeables.closeQuietly(in);
                        }
                    } finally {
                        S3Utils.closeStreamsQuietly(s3Obj);
                    }
                }
            });
        } catch (Exception e) {
            throw new SegmentLoadingException(e, e.getMessage());
        }
    }

    private String toFilename(String key, final String suffix) {
        String filename = key.substring(key.lastIndexOf("/") + 1); // characters after last '/'
        filename = filename.substring(0, filename.length() - suffix.length()); // remove the suffix from the end
        return filename;
    }

    private boolean isObjectInBucket(final S3Coords coords) throws SegmentLoadingException {
        try {
            return S3Utils.retryS3Operation(new Callable<Boolean>() {
                @Override
                public Boolean call() throws Exception {
                    return s3Client.isObjectInBucket(coords.bucket, coords.path);
                }
            });
        } catch (InterruptedException e) {
            throw Throwables.propagate(e);
        } catch (IOException e) {
            throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords);
        } catch (ServiceException e) {
            throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords);
        }
    }

    @Override
    public long getLastModified(DataSegment segment) throws SegmentLoadingException {
        final S3Coords coords = new S3Coords(segment);
        try {
            final StorageObject objDetails = S3Utils.retryS3Operation(new Callable<StorageObject>() {
                @Override
                public StorageObject call() throws Exception {
                    return s3Client.getObjectDetails(coords.bucket, coords.path);
                }
            });
            return objDetails.getLastModifiedDate().getTime();
        } catch (InterruptedException e) {
            throw Throwables.propagate(e);
        } catch (IOException e) {
            throw new SegmentLoadingException(e, e.getMessage());
        } catch (ServiceException e) {
            throw new SegmentLoadingException(e, e.getMessage());
        }
    }

    private static class S3Coords {
        String bucket;
        String path;

        public S3Coords(DataSegment segment) {
            Map<String, Object> loadSpec = segment.getLoadSpec();
            bucket = MapUtils.getString(loadSpec, BUCKET);
            path = MapUtils.getString(loadSpec, KEY);
            if (path.startsWith("/")) {
                path = path.substring(1);
            }
        }

        public String toString() {
            return String.format("s3://%s/%s", bucket, path);
        }
    }
}