Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.giraph.utils; import org.apache.commons.io.FilenameUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; import com.google.common.base.Optional; import java.io.IOException; import static org.apache.commons.io.FilenameUtils.getBaseName; /** * Helpers for dealing with {@link org.apache.hadoop.filecache.DistributedCache} */ public class DistributedCacheUtils { /** Logger */ private static final Logger LOG = Logger.getLogger(DistributedCacheUtils.class); /** Don't construct */ private DistributedCacheUtils() { } /** * Get local path to file from a DistributedCache. * * @param conf Configuration * @param pathToMatch Path that was used to insert into DistributedCache * @return Path matched, or Optional.absent() */ public static Optional<Path> getLocalCacheFile(Configuration conf, String pathToMatch) { String nameToPath = FilenameUtils.getName(pathToMatch); Path[] paths; try { paths = DistributedCache.getLocalCacheFiles(conf); } catch (IOException e) { return Optional.absent(); } for (Path path : paths) { if (FilenameUtils.getName(path.toString()).equals(nameToPath)) { return Optional.of(path); } } return Optional.absent(); } /** * Copy a file to HDFS if it is local. If the path is already in HDFS, this * call does nothing. * * @param path path to file * @param conf Configuration * @return path to file on HDFS. */ public static Path copyToHdfs(Path path, Configuration conf) { if (path.toString().startsWith("hdfs://")) { // Already on HDFS return path; } FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (IOException e) { throw new IllegalArgumentException("Failed to get HDFS FileSystem", e); } String name = getBaseName(path.toString()) + "-" + System.nanoTime(); Path remotePath = new Path("/tmp/giraph", name); LOG.info("copyToHdfsIfNecessary: Copying " + path + " to " + remotePath + " on hdfs " + fs.getUri()); try { fs.copyFromLocalFile(false, true, path, remotePath); } catch (IOException e) { throw new IllegalArgumentException("Failed to copy jython script from local path " + path + " to hdfs path " + remotePath + " on hdfs " + fs.getUri(), e); } return remotePath; } /** * Copy a file to HDFS if it is local, and adds it to the distributed cache. * * @param path path to file * @param conf Configuration * @return remote path to file */ public static Path copyAndAdd(Path path, Configuration conf) { Path remotePath = copyToHdfs(path, conf); DistributedCache.addCacheFile(remotePath.toUri(), conf); return remotePath; } }