org.apache.hadoop.fs.tar.TarIndex.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.fs.tar.TarIndex.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.fs.tar;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Scanner;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.AccessControlException;

/**
 * Creates a Index out of a Tar file. Also stores the index in a index file.
 * @author joydip
 *
 */
public class TarIndex {

    private HashMap<String, IndexEntry> index = new HashMap<String, IndexEntry>();

    public static final Log LOG = LogFactory.getLog(TarIndex.class);
    public static final String INDEX_EXT = ".index";

    private static class IndexEntry {
        long size;
        long offset;

        public IndexEntry(long size, long offset) {
            this.size = size;
            this.offset = offset;
        }
    }

    public TarIndex(FileSystem fs, Path tarPath) throws IOException {
        this(fs, tarPath, true, new Configuration());
    }

    /**
     * Creates a Index out of a tar file. 
     * Index is a MAP between Filename->start_offset
     * 
     * @param fs Underlying Hadoop FileSystem
     * @param tarPath Path to the Tar file
     * @param isWrite should I write the index to a file
     * @throws IOException
     */
    public TarIndex(FileSystem fs, Path tarPath, boolean isWrite, Configuration conf) throws IOException {

        Path indexPath = getIndexPath(tarPath);
        Path altIndexP = getAltIndexPath(tarPath, conf);

        boolean readOK = false;
        readOK = readIndexFile(fs, indexPath);

        if (readOK == false)
            readOK = readIndexFile(fs, altIndexP);

        if (readOK == false) {
            FSDataInputStream is = fs.open(tarPath);
            byte[] buffer = new byte[512];

            while (true) {
                int bytesRead = is.read(buffer);
                if (bytesRead == -1)
                    break;
                if (bytesRead < 512)
                    throw new IOException("Could not read the full header.");

                long currOffset = is.getPos();
                TarArchiveEntry entry = new TarArchiveEntry(buffer);

                // Index only normal files. Do not support directories yet.
                if (entry.isFile()) {
                    String name = entry.getName().trim();
                    if (!name.equals("")) {
                        IndexEntry ie = new IndexEntry(entry.getSize(), currOffset);
                        index.put(name, ie);
                    }
                }

                long nextOffset = currOffset + entry.getSize();
                if (nextOffset % 512 != 0)
                    nextOffset = ((nextOffset / 512) + 1) * 512;
                is.seek(nextOffset);
            }
            is.close();

            if (isWrite) {
                boolean writeOK = writeIndex(fs, indexPath);

                if (writeOK == false && altIndexP != null)
                    writeOK = writeIndex(fs, altIndexP);

                if (writeOK == false) {
                    Path p = altIndexP == null ? indexPath : altIndexP;

                    LOG.error("Could not create INDEX file " + p.toUri());
                    if (altIndexP == null)
                        LOG.error("You can specify alternate location for index"
                                + " creation using tarfs.tmp.dir property.");

                    LOG.error("Skipping writing index file.");
                }
            }
        }
    }

    private Path getIndexPath(Path tarPath) {
        return new Path(tarPath.toUri() + INDEX_EXT);
    }

    private Path getAltIndexPath(Path tarPath, Configuration conf) {
        String tmp = conf.get("tarfs.tmp.dir", null);
        if (tmp == null)
            return null;
        else
            return new Path(tmp + Path.SEPARATOR + tarPath + INDEX_EXT);
    }

    /**
     * Writes the index map to a file
     * @param fs
     * @param indexPath
     * @throws IOException
     */
    private boolean writeIndex(FileSystem fs, Path indexPath) throws IOException {

        if (fs.exists(indexPath)) {
            LOG.error("Index file already exists. Skipping writing index.");
            return false;
        }

        OutputStream os = null;
        PrintWriter out = null;

        try {
            fs.mkdirs(indexPath.getParent());
            os = fs.create(indexPath);
            out = new PrintWriter(os);

            for (String name : index.keySet()) {
                IndexEntry ie = index.get(name);
                out.println(name + " " + ie.size + " " + ie.offset);
            }
            return true;
        } catch (AccessControlException e) {
            //LOG.error("Can not open Index file for writing "+indexPath+" "+e.getMessage());
            return false;
        } finally {
            if (out != null)
                out.close();
            if (os != null)
                os.close();
        }
    }

    private boolean readIndexFile(FileSystem fs, Path indexPath) throws IOException {

        if (indexPath == null || !fs.exists(indexPath))
            return false;

        FSDataInputStream is = null;
        Scanner s = null;

        try {
            is = fs.open(indexPath);
            s = new Scanner(is);

            while (s.hasNextLine()) {
                String[] tokens = s.nextLine().split(" ");
                if (tokens.length != 3) {
                    LOG.error("Invalid Index File: " + indexPath);
                    return false;
                }

                IndexEntry ie = new IndexEntry(Long.parseLong(tokens[1]), Long.parseLong(tokens[2]));
                index.put(tokens[0], ie);
            }
            return true;
        } catch (AccessControlException e) {
            LOG.error("Can not open Index file for reading " + indexPath + " " + e.getMessage());
            return false;
        } finally {
            if (s != null)
                s.close();
            if (is != null)
                is.close();
        }
    }

    private IndexEntry getIndexEntry(String name) throws IOException {
        IndexEntry ie = index.get(name);
        if (ie == null)
            throw new IOException("Requested file \"" + name + "\" does not exist inside tar.");
        return ie;
    }

    public long getOffset(String name) throws IOException {
        return getIndexEntry(name).offset;
    }

    public long getSize(String name) throws IOException {
        return getIndexEntry(name).size;
    }

    /**
     * return a sorted list of all offsets 
     */
    public long[] getOffsetList() {
        long[] offsetArr = new long[index.size()];
        int i = 0;
        for (String name : index.keySet()) {
            offsetArr[i++] = index.get(name).offset;
        }

        // TBD: Could this sort be a bottleneck?
        Arrays.sort(offsetArr);
        return offsetArr;
    }

    public String[] getFileList() {
        String[] fileNames = new String[index.size()];
        return index.keySet().toArray(fileNames);
    }
}