io.github.retz.executor.FileManager.java Source code

Java tutorial

Introduction

Here is the source code for io.github.retz.executor.FileManager.java

Source

/**
 *    Retz
 *    Copyright (C) 2016 Nautilus Technologies, KK.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */
package io.github.retz.executor;

import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.FileSystemException;
import java.nio.file.Paths;
import java.util.List;
import java.util.zip.GZIPInputStream;

// @doc a set of functions that download, extract files from remote object storage like HDFS or HTTP web.
// This checks local directory whether it already downloaded or not.
public class FileManager {
    private static final Logger LOG = LoggerFactory.getLogger(FileManager.class);

    static void fetchPersistentFiles(List<String> files, String destination, boolean trustPVFiles)
            throws IOException {
        for (String file : files) {
            java.nio.file.Path path = Paths.get(file).getFileName();
            if (path == null) {
                throw new FileSystemException(destination);
            }
            File f = new File(FilenameUtils.concat(destination, path.toString()));
            LOG.info("Downloading: {} as {}", file, f);
            if (f.exists()) {
                LOG.debug("File already exists: {}", f);
                if (!trustPVFiles) {
                    try {
                        boolean needsDecompression = needsDecompression(f, destination);
                        if (needsDecompression) {
                            decompress(f, destination);
                        } else {
                            LOG.info("File {} was correctly decompressed before. Skipping decompression.", file);
                        }
                    } catch (ArchiveException e) {
                        LOG.error("ArchiveException on {}: {}", f, e.getMessage());
                        e.printStackTrace();
                    }
                }
            } else if (file.startsWith("http")) {
                fetchHTTPFile(file, destination);
                decompress(f, destination);
            } else if (file.startsWith("hdfs://")) {
                fetchHDFSFile(file, destination);
                decompress(f, destination);
            } else if (file.startsWith("maprfs://")) {
                fetchHDFSFile(file, destination);
                decompress(f, destination);
            } else {
                LOG.error("Invalid URL scheme: {}", file);
            }
        }
    }

    private static void fetchHTTPFile(String file, String dest) throws IOException {
        URL url = new URL(file);

        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");

        conn.setDoOutput(true);
        java.nio.file.Path path = Paths.get(file).getFileName();
        if (path == null) {
            throw new FileNotFoundException(file);
        }
        String filename = path.toString();
        InputStream input = null;
        try (FileOutputStream output = new FileOutputStream(dest + "/" + filename)) {
            input = conn.getInputStream();
            byte[] buffer = new byte[65536];
            int bytesRead = 0;
            while ((bytesRead = input.read(buffer)) != -1) {
                output.write(buffer, 0, bytesRead);
            }
        } catch (IOException e) {
            LOG.debug(e.getMessage());
            throw e;
        } finally {
            if (input != null)
                input.close();
        }
        conn.disconnect();
        LOG.info("Download finished: {}", file);
    }

    private static void fetchHDFSFile(String file, String dest) throws IOException {
        LOG.debug("Downloading {} to {} as HDFS file", file, dest);
        // TODO: make 'hadoop' command arbitrarily specifiable, but given that mesos-agent (slave) can fetch hdfs:// files, it should be also available, too
        String[] hadoopCmd = { "hadoop", "fs", "-copyToLocal", file, dest };
        LOG.debug("Command: {}", String.join(" ", hadoopCmd));
        ProcessBuilder pb = new ProcessBuilder();
        pb.command(hadoopCmd).inheritIO();

        Process p = pb.start();
        while (true) {
            try {
                int result = p.waitFor();
                if (result != 0) {
                    LOG.error("Downloading {} failed: {}", file, result);
                } else {
                    LOG.info("Download finished: {}", file);
                }
                return;
            } catch (InterruptedException e) {
                LOG.error("Download process interrupted: {}", e.getMessage()); // TODO: debug?
            }
        }
    }

    private static boolean needsDecompression(File file, String dir) throws IOException, ArchiveException {
        // To use autodetect feature of ArchiveStreamFactory, input stream must be wrapped
        // with BufferedInputStream. From commons-compress mailing list.
        LOG.debug("loading file .... {} as {}", file, file.getPath());

        boolean needsDecompression = false;
        try (ArchiveInputStream ain = createAIS(file)) {
            while (ain != null) {
                ArchiveEntry entry = ain.getNextEntry();
                if (entry == null)
                    break;

                // LOG.debug("name: {} size:{} dir: {}", entry.getName(), entry.getSize(), entry.isDirectory());
                File f = new File(FilenameUtils.concat(dir, entry.getName()));
                // LOG.debug("{} exists: {}, size:{}, dir:{}", f, f.exists(), f.length(), f.isDirectory());
                if (f.isDirectory()) {
                    continue;
                } else if (!f.exists() || entry.getSize() != f.length()) {
                    LOG.info("File {} differs: seems not decompressed before", f);
                    needsDecompression = true;
                    break;
                }
            }
        } catch (ArchiveException e) {
            needsDecompression = true;
        }
        return needsDecompression;
    }

    private static void decompress(File file, String dir) throws IOException {
        LOG.info("{} needs decompression: starting", file);
        String[] cmd = { "tar", "xf", file.getAbsolutePath(), "-C", dir };
        ProcessBuilder pb = new ProcessBuilder().command(cmd).inheritIO();
        try {
            Process p = pb.start();
            int r = p.waitFor();
            if (r == 0) {
                LOG.info("file {} successfully decompressed", file);
            } else {
                LOG.error("Failed decompression of file {}: {}", file, r);
            }
        } catch (InterruptedException e) {
            LOG.error(e.getMessage());
        }
    }

    private static ArchiveInputStream createAIS(File file)
            throws FileNotFoundException, IOException, ArchiveException {
        ArchiveStreamFactory factory = new ArchiveStreamFactory();
        InputStream in = new BufferedInputStream(new FileInputStream(file));

        if (file.getName().endsWith(".tar.gz") || file.getName().endsWith(".tgz")) {
            return factory.createArchiveInputStream(ArchiveStreamFactory.TAR, new GZIPInputStream(in));
        } else if (file.getName().endsWith(".tar.bz2") || file.getName().endsWith(".tar.xz")) { // TODO: "tar, tbz2, txz. See mesos/src/launcher/fetcher.cpp for supported formats
            LOG.error("TODO: compression on {} must be supported", file.getName());
            throw new RuntimeException();
        }
        LOG.error("Not decompressing. File with unsupported suffix: {}", file);
        return null;
    }

}