com.pinterest.secor.util.FileUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.pinterest.secor.util.FileUtil.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.pinterest.secor.util;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;

import java.text.SimpleDateFormat;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.pinterest.secor.common.SecorConfig;

/**
 * File util implements utilities for interactions with the file system.
 *
 * @author Pawel Garbacki (pawel@pinterest.com)
 */
public class FileUtil {
    private static final Logger LOG = LoggerFactory.getLogger(FileUtil.class);
    private static Configuration mConf = new Configuration(true);
    private static final char[] m_digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
            'e', 'f' };
    private static final Pattern datePattern = Pattern.compile(".*dt=(\\d\\d\\d\\d-\\d\\d-\\d\\d).*");

    public static void configure(SecorConfig config) {
        if (config != null) {
            if (config.getCloudService().equals("Swift")) {
                mConf.set("fs.swift.impl", "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem");
                mConf.set("fs.swift.service.GENERICPROJECT.auth.url", config.getSwiftAuthUrl());
                mConf.set("fs.swift.service.GENERICPROJECT.username", config.getSwiftUsername());
                mConf.set("fs.swift.service.GENERICPROJECT.tenant", config.getSwiftTenant());
                mConf.set("fs.swift.service.GENERICPROJECT.http.port", config.getSwiftPort());
                mConf.set("fs.swift.service.GENERICPROJECT.use.get.auth", config.getSwiftGetAuth());
                mConf.set("fs.swift.service.GENERICPROJECT.public", config.getSwiftPublic());
                if (config.getSwiftGetAuth().equals("true")) {
                    mConf.set("fs.swift.service.GENERICPROJECT.apikey", config.getSwiftApiKey());
                } else {
                    mConf.set("fs.swift.service.GENERICPROJECT.password", config.getSwiftPassword());
                }
            } else if (config.getCloudService().equals("S3")) {
                if (config.getAwsAccessKey().isEmpty() != config.getAwsSecretKey().isEmpty()) {
                    throw new IllegalArgumentException(
                            "Must specify both aws.access.key and aws.secret.key or neither.");
                }
                if (!config.getAwsAccessKey().isEmpty()) {
                    mConf.set(Constants.ACCESS_KEY, config.getAwsAccessKey());
                    mConf.set(Constants.SECRET_KEY, config.getAwsSecretKey());
                    mConf.set("fs.s3n.awsAccessKeyId", config.getAwsAccessKey());
                    mConf.set("fs.s3n.awsSecretAccessKey", config.getAwsSecretKey());
                }
            }
        }
    }

    public static FileSystem getFileSystem(String path) throws IOException {
        return FileSystem.get(URI.create(path), mConf);
    }

    public static boolean s3PathPrefixIsAltered(String logFileName, SecorConfig config) throws Exception {
        Date logDate = null;
        if (config.getS3AlterPathDate() != null && !config.getS3AlterPathDate().isEmpty()) {

            Date s3AlterPathDate = new SimpleDateFormat("yyyy-MM-dd").parse(config.getS3AlterPathDate());

            // logFileName contains the log path, e.g. raw_logs/secor_topic/dt=2016-04-20/3_0_0000000000000292564
            Matcher dateMatcher = datePattern.matcher(logFileName);
            if (dateMatcher.find()) {
                logDate = new SimpleDateFormat("yyyy-MM-dd").parse(dateMatcher.group(1));
            }

            if (logDate == null) {
                throw new Exception("Did not find a date in the format yyyy-MM-dd in " + logFileName);
            }

            if (!s3AlterPathDate.after(logDate)) {
                return true;
            }
        }

        return false;
    }

    public static String getS3AlternativePathPrefix(SecorConfig config) {
        return config.getS3AlternativePath();
    }

    public static String getS3AlternativePrefix(SecorConfig config) {
        return config.getS3FileSystem() + "://" + config.getS3Bucket() + "/" + config.getS3AlternativePath();
    }

    public static String getPrefix(String topic, SecorConfig config) throws IOException {
        String prefix = null;
        if (config.getCloudService().equals("Swift")) {
            String container = null;
            if (config.getSeparateContainersForTopics()) {
                if (!exists("swift://" + topic + ".GENERICPROJECT")) {
                    String containerUrl = "swift://" + topic + ".GENERICPROJECT";
                    Path containerPath = new Path(containerUrl);
                    getFileSystem(containerUrl).create(containerPath).close();
                }
                container = topic;
            } else {
                container = config.getSwiftContainer();
            }
            prefix = "swift://" + container + ".GENERICPROJECT/" + config.getSwiftPath();
        } else if (config.getCloudService().equals("S3")) {
            prefix = config.getS3Prefix();
        } else if (config.getCloudService().equals("GS")) {
            prefix = "gs://" + config.getGsBucket() + "/" + config.getGsPath();
        } else if (config.getCloudService().equals("Azure")) {
            prefix = "azure://" + config.getAzureContainer() + "/" + config.getAzurePath();
        }
        return prefix;
    }

    public static String[] list(String path) throws IOException {
        FileSystem fs = getFileSystem(path);
        Path fsPath = new Path(path);
        ArrayList<String> paths = new ArrayList<String>();
        FileStatus[] statuses = fs.listStatus(fsPath);
        if (statuses != null) {
            for (FileStatus status : statuses) {
                Path statusPath = status.getPath();
                if (path.startsWith("s3://") || path.startsWith("s3n://") || path.startsWith("s3a://")
                        || path.startsWith("swift://") || path.startsWith("gs://")) {
                    paths.add(statusPath.toUri().toString());
                } else {
                    paths.add(statusPath.toUri().getPath());
                }
            }
        }
        return paths.toArray(new String[] {});
    }

    public static String[] listRecursively(String path) throws IOException {
        ArrayList<String> paths = new ArrayList<String>();
        String[] directPaths = list(path);
        for (String directPath : directPaths) {
            if (directPath.equals(path)) {
                assert directPaths.length == 1 : Integer.toString(directPaths.length) + " == 1";
                paths.add(directPath);
            } else {
                String[] recursivePaths = listRecursively(directPath);
                paths.addAll(Arrays.asList(recursivePaths));
            }
        }
        return paths.toArray(new String[] {});
    }

    public static boolean exists(String path) throws IOException {
        FileSystem fs = getFileSystem(path);
        Path fsPath = new Path(path);
        return fs.exists(fsPath);
    }

    public static void delete(String path) throws IOException {
        if (exists(path)) {
            Path fsPath = new Path(path);
            boolean success = getFileSystem(path).delete(fsPath, true); // recursive
            if (!success) {
                throw new IOException("Failed to delete " + path);
            }
        }
    }

    public static void deleteOnExit(String path) {
        File file = new File(path);
        file.deleteOnExit();
    }

    public static void moveToCloud(String srcLocalPath, String dstCloudPath) throws IOException {
        Path srcPath = new Path(srcLocalPath);
        Path dstPath = new Path(dstCloudPath);
        getFileSystem(dstCloudPath).moveFromLocalFile(srcPath, dstPath);
    }

    public static void touch(String path) throws IOException {
        FileSystem fs = getFileSystem(path);
        Path fsPath = new Path(path);
        fs.create(fsPath).close();
    }

    public static long getModificationTimeMsRecursive(String path) throws IOException {
        FileSystem fs = getFileSystem(path);
        Path fsPath = new Path(path);
        FileStatus status = fs.getFileStatus(fsPath);
        long modificationTime = status.getModificationTime();
        FileStatus[] statuses = fs.listStatus(fsPath);
        if (statuses != null) {
            for (FileStatus fileStatus : statuses) {
                Path statusPath = fileStatus.getPath();
                String stringPath;
                if (path.startsWith("s3://") || path.startsWith("s3n://") || path.startsWith("s3a://")
                        || path.startsWith("swift://") || path.startsWith("gs://")) {
                    stringPath = statusPath.toUri().toString();
                } else {
                    stringPath = statusPath.toUri().getPath();
                }
                if (!stringPath.equals(path)) {
                    modificationTime = Math.max(modificationTime, getModificationTimeMsRecursive(stringPath));
                }
            }
        }
        return modificationTime;
    }

    /** Generate MD5 hash of topic and partitions. And extract first 4 characters of the MD5 hash.
     * @param topic
     * @param partitions
     * @return
     */
    public static String getMd5Hash(String topic, String[] partitions) {
        ArrayList<String> elements = new ArrayList<String>();
        elements.add(topic);
        for (String partition : partitions) {
            elements.add(partition);
        }
        String pathPrefix = StringUtils.join(elements, "/");
        try {
            final MessageDigest messageDigest = MessageDigest.getInstance("MD5");
            byte[] md5Bytes = messageDigest.digest(pathPrefix.getBytes("UTF-8"));
            return getHexEncode(md5Bytes).substring(0, 4);
        } catch (NoSuchAlgorithmException e) {
            LOG.error(e.getMessage());
        } catch (UnsupportedEncodingException e) {
            LOG.error(e.getMessage());
        }
        return "";
    }

    private static String getHexEncode(byte[] bytes) {
        final char[] chars = new char[bytes.length * 2];
        for (int i = 0; i < bytes.length; ++i) {
            final int cx = i * 2;
            final byte b = bytes[i];
            chars[cx] = m_digits[(b & 0xf0) >> 4];
            chars[cx + 1] = m_digits[(b & 0x0f)];
        }
        return new String(chars);
    }
}