com.google.cloud.hadoop.fs.gcs.ListHelperGoogleHadoopFileSystem.java Source code

Java tutorial

Introduction

Here is the source code for com.google.cloud.hadoop.fs.gcs.ListHelperGoogleHadoopFileSystem.java

Source

/**
 * Copyright 2014 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.hadoop.fs.gcs;

import com.google.cloud.hadoop.gcsio.FileInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.MetadataReadOnlyGoogleCloudStorage;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

/**
 * ListHelperGoogleHadoopFileSystem overrides the behavior of GoogleHadoopFileSystem to manifest
 * a temporary FileSystem suitable only for list/get methods for retrieving file metadata, based
 * on an initial collection of file metadata provided at construction time. Can be used as a
 * heavyweight cache which behaves just like a normal filesystem for such metadata-read operations
 * and lives in the context of a single complex top-level method call, like globStatus.
 * <p>
 * Note that this class is *not* intended to be used as a general-usage FileSystem.
 */
class ListHelperGoogleHadoopFileSystem extends GoogleHadoopFileSystem {

    /**
     * Factory method for constructing and initializing an instance of
     * ListHelperGoogleHadoopFileSystem which is ready to list/get FileStatus entries corresponding
     * to {@code fileInfos}.
     */
    public static GoogleHadoopFileSystem createInstance(GoogleCloudStorageFileSystem gcsfs,
            Collection<FileInfo> fileInfos) throws IOException {
        Preconditions.checkState(!fileInfos.isEmpty(),
                "Cannot construct ListHelperGoogleHadoopFileSystem with empty fileInfos list!");
        List<GoogleCloudStorageItemInfo> infos = new ArrayList<>();
        URI rootUri = null;

        Set<URI> providedPaths = new HashSet<>();
        for (FileInfo info : fileInfos) {
            infos.add(info.getItemInfo());
            providedPaths.add(info.getPath());

            if (rootUri == null) {
                // Set the root URI to the first path in the collection.
                rootUri = info.getPath();
            }
        }

        // The flow for populating this doesn't bother to populate metadata entries for parent
        // directories but we know the parent directories are expected to exist, so we'll just
        // populate the missing entries explicitly here. Necessary for getFileStatus(parentOfInfo)
        // to work when using an instance of this class.
        for (FileInfo info : fileInfos) {
            URI parentPath = gcsfs.getParentPath(info.getPath());
            while (parentPath != null && !parentPath.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) {
                if (!providedPaths.contains(parentPath)) {
                    LOG.debug("Adding fake entry for missing parent path '{}'", parentPath);
                    GoogleCloudStorageItemInfo fakeInfo = new GoogleCloudStorageItemInfo(
                            gcsfs.getPathCodec().validatePathAndGetId(parentPath, true), 0, 0, null, null);
                    infos.add(fakeInfo);
                    providedPaths.add(parentPath);
                }
                parentPath = gcsfs.getParentPath(parentPath);
            }
        }

        // Add in placeholder bucket info, since the bucket info won't be relevant for our listObject
        // operations.
        String tempBucket = rootUri.getAuthority();
        infos.add(new GoogleCloudStorageItemInfo(new StorageResourceId(tempBucket), 0, 0, "", ""));

        MetadataReadOnlyGoogleCloudStorage tempGcs = new MetadataReadOnlyGoogleCloudStorage(infos);
        GoogleCloudStorageFileSystem tempGcsFs = new GoogleCloudStorageFileSystem(tempGcs, gcsfs.getOptions());
        GoogleHadoopFileSystem tempGhfs = new ListHelperGoogleHadoopFileSystem(tempGcsFs);

        Configuration tempConfig = new Configuration();
        tempConfig.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, tempBucket);
        tempConfig.setBoolean(GoogleHadoopFileSystemBase.GCS_CREATE_SYSTEM_BUCKET_KEY, false);
        tempConfig.set(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, "/");
        // Set initSuperclass == false to avoid screwing up FileSystem statistics.
        tempGhfs.initialize(rootUri, tempConfig, false);
        return tempGhfs;
    }

    /**
     * Constructs an instance of GoogleHadoopFileSystem using the provided
     * GoogleCloudStorageFileSystem; initialize() will not re-initialize it.
     */
    public ListHelperGoogleHadoopFileSystem(GoogleCloudStorageFileSystem gcsfs) {
        super(gcsfs);
    }

    /**
     * Do not inherit the ability to use flat globbing so that we don't end up with an infinite
     * stack of ListHelperGoogleHadoopFileSystem instances.
     */
    @Override
    boolean shouldUseFlatGlob(Path fixedPath) {
        return false;
    }
}