com.cloudera.cdk.data.filesystem.FileSystemView.java Source code

Introduction

Here is the source code for com.cloudera.cdk.data.filesystem.FileSystemView.java
Source

/*
 * Copyright 2013 Cloudera.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.cdk.data.filesystem;

import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.DatasetIOException;
import com.cloudera.cdk.data.DatasetReader;
import com.cloudera.cdk.data.DatasetWriter;
import com.cloudera.cdk.data.View;
import com.cloudera.cdk.data.spi.AbstractRangeView;
import com.cloudera.cdk.data.spi.StorageKey;
import com.cloudera.cdk.data.spi.MarkerRange;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import javax.annotation.concurrent.Immutable;
import java.io.IOException;

/**
 * FileSystem implementation of a range-based {@link View}.
 *
 * @param <E> The type of records read and written by this view.
 */
@Immutable
class FileSystemView<E> extends AbstractRangeView<E> {

    private final FileSystem fs;
    private final Path root;

    FileSystemView(FileSystemDataset<E> dataset) {
        super(dataset);
        this.fs = dataset.getFileSystem();
        this.root = dataset.getDirectory();
    }

    private FileSystemView(FileSystemView<E> view, MarkerRange range) {
        super(view, range);
        this.fs = view.fs;
        this.root = view.root;
    }

    @Override
    protected FileSystemView<E> newLimitedCopy(MarkerRange newRange) {
        return new FileSystemView<E>(this, newRange);
    }

    @Override
    public DatasetReader<E> newReader() {
        return new MultiFileDatasetReader<E>(fs, pathIterator(), dataset.getDescriptor());
    }

    @Override
    public DatasetWriter<E> newWriter() {
        if (dataset.getDescriptor().isPartitioned()) {
            return new PartitionedDatasetWriter<E>(this);
        } else {
            return FileSystemWriters.newFileWriter(fs, root, dataset.getDescriptor());
        }
    }

    @Override
    public boolean deleteAll() {
        PathConversion convert = new PathConversion();
        boolean deleted = false;
        for (StorageKey partition : partitionIterator()) {
            deleted = cleanlyDelete(fs, root, convert.fromKey(partition)) || deleted;
        }
        return deleted;
    }

    @Override
    @SuppressWarnings("unchecked")
    public Iterable<View<E>> getCoveringPartitions() {
        if (dataset.getDescriptor().isPartitioned()) {
            return Iterables.transform(partitionIterator(), new Function<StorageKey, View<E>>() {
                @Override
                public View<E> apply(StorageKey key) {
                    if (key != null) {
                        // no need for the bounds checks, use dataset.in
                        return ((FileSystemDataset) dataset).of(key);
                    } else {
                        throw new DatasetException("[BUG] Null partition");
                    }
                }
            });
        } else {
            return Lists.newArrayList((View<E>) this);
        }
    }

    PathIterator pathIterator() {
        final Iterable<Path> directories;
        if (dataset.getDescriptor().isPartitioned()) {
            directories = Iterables.transform(partitionIterator(), new Function<StorageKey, Path>() {
                private final PathConversion convert = new PathConversion();

                @Override
                public Path apply(StorageKey key) {
                    if (key != null) {
                        return new Path(root, convert.fromKey(key));
                    } else {
                        throw new DatasetException("[BUG] Null partition");
                    }
                }
            });
        } else {
            directories = Lists.newArrayList(root);
        }
        return new PathIterator(fs, directories);
    }

    private FileSystemPartitionIterator partitionIterator() {
        try {
            return new FileSystemPartitionIterator(fs, root, dataset.getDescriptor().getPartitionStrategy(), range);
        } catch (IOException ex) {
            throw new DatasetException("Cannot list partitions in view:" + this, ex);
        }
    }

    private static boolean cleanlyDelete(FileSystem fs, Path root, Path dir) {
        try {
            boolean deleted = false;
            if (dir.isAbsolute()) {
                deleted = fs.delete(dir, true /* include any files */ );
            } else {
                // the path should be treated as relative to the root path
                Path absolute = new Path(root, dir);
                deleted = fs.delete(absolute, true /* include any files */ );
                // iterate up to the root, removing empty directories
                for (Path current = absolute.getParent(); !current.equals(root)
                        && !current.isRoot(); current = current.getParent()) {
                    final FileStatus[] stats = fs.listStatus(current);
                    if (stats == null || stats.length == 0) {
                        // dir is empty and should be removed
                        deleted = fs.delete(current, true) || deleted;
                    } else {
                        // all parent directories will be non-empty
                        break;
                    }
                }
            }
            return deleted;
        } catch (IOException ex) {
            throw new DatasetIOException("Could not cleanly delete path:" + dir, ex);
        }
    }
}