com.metamx.druid.loading.S3SegmentGetter.java Source code

Java tutorial

Introduction

Here is the source code for com.metamx.druid.loading.S3SegmentGetter.java

Source

/*
 * Druid - a distributed column store.
 * Copyright (C) 2012  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package com.metamx.druid.loading;

import com.google.inject.Inject;
import com.metamx.common.MapUtils;
import com.metamx.common.StreamUtils;
import com.metamx.common.logger.Logger;
import com.metamx.druid.common.s3.S3Utils;
import org.apache.commons.io.FileUtils;
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
import org.jets3t.service.model.S3Bucket;
import org.jets3t.service.model.S3Object;
import org.joda.time.DateTime;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Map;
import java.util.zip.GZIPInputStream;

/**
 */
public class S3SegmentGetter implements SegmentGetter {
    private static final Logger log = new Logger(S3SegmentGetter.class);
    private static final long DEFAULT_TIMEOUT = 5 * 60 * 1000;

    private static final String BUCKET = "bucket";
    private static final String KEY = "key";

    private final RestS3Service s3Client;
    private final S3SegmentGetterConfig config;

    @Inject
    public S3SegmentGetter(RestS3Service s3Client, S3SegmentGetterConfig config) {
        this.s3Client = s3Client;
        this.config = config;
    }

    @Override
    public File getSegmentFiles(Map<String, Object> loadSpec) throws StorageAdapterLoadingException {
        String s3Bucket = MapUtils.getString(loadSpec, "bucket");
        String s3Path = MapUtils.getString(loadSpec, "key");

        log.info("Loading index at path[s3://%s/%s]", s3Bucket, s3Path);

        S3Object s3Obj = null;
        File tmpFile = null;
        try {
            if (!s3Client.isObjectInBucket(s3Bucket, s3Path)) {
                throw new StorageAdapterLoadingException("IndexFile[s3://%s/%s] does not exist.", s3Bucket, s3Path);
            }

            File cacheFile = new File(config.getCacheDirectory(), computeCacheFilePath(s3Bucket, s3Path));

            if (cacheFile.exists()) {
                S3Object objDetails = s3Client.getObjectDetails(new S3Bucket(s3Bucket), s3Path);
                DateTime cacheFileLastModified = new DateTime(cacheFile.lastModified());
                DateTime s3ObjLastModified = new DateTime(objDetails.getLastModifiedDate().getTime());
                if (cacheFileLastModified.isAfter(s3ObjLastModified)) {
                    log.info("Found cacheFile[%s] with modified[%s], which is after s3Obj[%s].  Using.", cacheFile,
                            cacheFileLastModified, s3ObjLastModified);
                    return cacheFile.getParentFile();
                }
                FileUtils.deleteDirectory(cacheFile.getParentFile());
            }

            long currTime = System.currentTimeMillis();

            tmpFile = File.createTempFile(s3Bucket, new DateTime().toString());
            log.info("Downloading file[s3://%s/%s] to local tmpFile[%s] for cacheFile[%s]", s3Bucket, s3Path,
                    tmpFile, cacheFile);

            s3Obj = s3Client.getObject(new S3Bucket(s3Bucket), s3Path);
            StreamUtils.copyToFileAndClose(s3Obj.getDataInputStream(), tmpFile, DEFAULT_TIMEOUT);
            final long downloadEndTime = System.currentTimeMillis();
            log.info("Download of file[%s] completed in %,d millis", cacheFile, downloadEndTime - currTime);

            if (!cacheFile.getParentFile().mkdirs()) {
                log.info("Unable to make parent file[%s]", cacheFile.getParentFile());
            }
            cacheFile.delete();

            if (s3Path.endsWith("gz")) {
                log.info("Decompressing file[%s] to [%s]", tmpFile, cacheFile);
                StreamUtils.copyToFileAndClose(new GZIPInputStream(new FileInputStream(tmpFile)), cacheFile);
                if (!tmpFile.delete()) {
                    log.error("Could not delete tmpFile[%s].", tmpFile);
                }
            } else {
                log.info("Rename tmpFile[%s] to cacheFile[%s]", tmpFile, cacheFile);
                if (!tmpFile.renameTo(cacheFile)) {
                    log.warn("Error renaming tmpFile[%s] to cacheFile[%s].  Copying instead.", tmpFile, cacheFile);

                    StreamUtils.copyToFileAndClose(new FileInputStream(tmpFile), cacheFile);
                    if (!tmpFile.delete()) {
                        log.error("Could not delete tmpFile[%s].", tmpFile);
                    }
                }
            }

            long endTime = System.currentTimeMillis();
            log.info("Local processing of file[%s] done in %,d millis", cacheFile, endTime - downloadEndTime);

            return cacheFile.getParentFile();
        } catch (Exception e) {
            throw new StorageAdapterLoadingException(e, e.getMessage());
        } finally {
            S3Utils.closeStreamsQuietly(s3Obj);
            if (tmpFile != null && tmpFile.exists()) {
                log.warn("Deleting tmpFile[%s] in finally block.  Why?", tmpFile);
                tmpFile.delete();
            }
        }
    }

    private String computeCacheFilePath(String s3Bucket, String s3Path) {
        return String.format("%s/%s", s3Bucket,
                s3Path.endsWith("gz") ? s3Path.substring(0, s3Path.length() - ".gz".length()) : s3Path);
    }

    @Override
    public boolean cleanSegmentFiles(Map<String, Object> loadSpec) throws StorageAdapterLoadingException {
        File cacheFile = new File(config.getCacheDirectory(),
                computeCacheFilePath(MapUtils.getString(loadSpec, BUCKET), MapUtils.getString(loadSpec, KEY)));

        try {
            final File parentFile = cacheFile.getParentFile();
            log.info("Recursively deleting file[%s]", parentFile);
            FileUtils.deleteDirectory(parentFile);
        } catch (IOException e) {
            throw new StorageAdapterLoadingException(e, e.getMessage());
        }

        return true;
    }
}