Example usage for org.apache.hadoop.fs FileSystem setReplication

List of usage examples for org.apache.hadoop.fs FileSystem setReplication

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem setReplication.

Prototype

public boolean setReplication(Path src, short replication) throws IOException 

Source Link

Document

Set the replication for an existing file.

Usage

From source file:org.apache.accumulo.core.security.crypto.DefaultSecretKeyEncryptionStrategy.java

License:Apache License

private void doKeyEncryptionOperation(int encryptionMode, CryptoModuleParameters params, String pathToKeyName,
        Path pathToKey, FileSystem fs) throws IOException {
    DataInputStream in = null;/*  ww w  .ja  v a2  s  .  c  o  m*/
    try {
        if (!fs.exists(pathToKey)) {

            if (encryptionMode == Cipher.UNWRAP_MODE) {
                log.error(
                        "There was a call to decrypt the session key but no key encryption key exists.  Either restore it, reconfigure the conf file to point to it in HDFS, or throw the affected data away and begin again.");
                throw new RuntimeException(
                        "Could not find key encryption key file in configured location in HDFS ("
                                + pathToKeyName + ")");
            } else {
                DataOutputStream out = null;
                try {
                    out = fs.create(pathToKey);
                    // Very important, lets hedge our bets
                    fs.setReplication(pathToKey, (short) 5);
                    SecureRandom random = DefaultCryptoModuleUtils.getSecureRandom(
                            params.getRandomNumberGenerator(), params.getRandomNumberGeneratorProvider());
                    int keyLength = params.getKeyLength();
                    byte[] newRandomKeyEncryptionKey = new byte[keyLength / 8];
                    random.nextBytes(newRandomKeyEncryptionKey);
                    out.writeInt(newRandomKeyEncryptionKey.length);
                    out.write(newRandomKeyEncryptionKey);
                    out.flush();
                } finally {
                    if (out != null) {
                        out.close();
                    }
                }

            }
        }
        in = fs.open(pathToKey);

        int keyEncryptionKeyLength = in.readInt();
        byte[] keyEncryptionKey = new byte[keyEncryptionKeyLength];
        in.read(keyEncryptionKey);

        Cipher cipher = DefaultCryptoModuleUtils.getCipher(
                params.getAllOptions().get(Property.CRYPTO_DEFAULT_KEY_STRATEGY_CIPHER_SUITE.getKey()));

        try {
            cipher.init(encryptionMode, new SecretKeySpec(keyEncryptionKey, params.getAlgorithmName()));
        } catch (InvalidKeyException e) {
            log.error(e);
            throw new RuntimeException(e);
        }

        if (Cipher.UNWRAP_MODE == encryptionMode) {
            try {
                Key plaintextKey = cipher.unwrap(params.getEncryptedKey(), params.getAlgorithmName(),
                        Cipher.SECRET_KEY);
                params.setPlaintextKey(plaintextKey.getEncoded());
            } catch (InvalidKeyException e) {
                log.error(e);
                throw new RuntimeException(e);
            } catch (NoSuchAlgorithmException e) {
                log.error(e);
                throw new RuntimeException(e);
            }
        } else {
            Key plaintextKey = new SecretKeySpec(params.getPlaintextKey(), params.getAlgorithmName());
            try {
                byte[] encryptedSecretKey = cipher.wrap(plaintextKey);
                params.setEncryptedKey(encryptedSecretKey);
                params.setOpaqueKeyEncryptionKeyID(pathToKeyName);
            } catch (InvalidKeyException e) {
                log.error(e);
                throw new RuntimeException(e);
            } catch (IllegalBlockSizeException e) {
                log.error(e);
                throw new RuntimeException(e);
            }

        }

    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:org.apache.accumulo.core.security.crypto.NonCachingSecretKeyEncryptionStrategy.java

License:Apache License

private void doKeyEncryptionOperation(int encryptionMode, CryptoModuleParameters params, String pathToKeyName,
        Path pathToKey, FileSystem fs) throws IOException {
    DataInputStream in = null;//from   w  ww.  ja va2 s .co  m
    try {
        if (!fs.exists(pathToKey)) {

            if (encryptionMode == Cipher.UNWRAP_MODE) {
                log.error("There was a call to decrypt the session key but no key encryption key exists. "
                        + "Either restore it, reconfigure the conf file to point to it in HDFS, or throw the affected data away and begin again.");
                throw new RuntimeException(
                        "Could not find key encryption key file in configured location in HDFS ("
                                + pathToKeyName + ")");
            } else {
                DataOutputStream out = null;
                try {
                    out = fs.create(pathToKey);
                    // Very important, lets hedge our bets
                    fs.setReplication(pathToKey, (short) 5);
                    SecureRandom random = DefaultCryptoModuleUtils.getSecureRandom(
                            params.getRandomNumberGenerator(), params.getRandomNumberGeneratorProvider());
                    int keyLength = params.getKeyLength();
                    byte[] newRandomKeyEncryptionKey = new byte[keyLength / 8];
                    random.nextBytes(newRandomKeyEncryptionKey);
                    out.writeInt(newRandomKeyEncryptionKey.length);
                    out.write(newRandomKeyEncryptionKey);
                    out.flush();
                } finally {
                    if (out != null) {
                        out.close();
                    }
                }

            }
        }
        in = fs.open(pathToKey);

        int keyEncryptionKeyLength = in.readInt();
        byte[] keyEncryptionKey = new byte[keyEncryptionKeyLength];
        int bytesRead = in.read(keyEncryptionKey);

        Cipher cipher = DefaultCryptoModuleUtils.getCipher(
                params.getAllOptions().get(Property.CRYPTO_DEFAULT_KEY_STRATEGY_CIPHER_SUITE.getKey()));

        // check if the number of bytes read into the array is the same as the value of the length field,
        if (bytesRead == keyEncryptionKeyLength) {
            try {
                cipher.init(encryptionMode, new SecretKeySpec(keyEncryptionKey, params.getAlgorithmName()));
            } catch (InvalidKeyException e) {
                log.error("{}", e.getMessage(), e);
                throw new RuntimeException(e);
            }

            if (Cipher.UNWRAP_MODE == encryptionMode) {
                try {
                    Key plaintextKey = cipher.unwrap(params.getEncryptedKey(), params.getAlgorithmName(),
                            Cipher.SECRET_KEY);
                    params.setPlaintextKey(plaintextKey.getEncoded());
                } catch (InvalidKeyException e) {
                    log.error("{}", e.getMessage(), e);
                    throw new RuntimeException(e);
                } catch (NoSuchAlgorithmException e) {
                    log.error("{}", e.getMessage(), e);
                    throw new RuntimeException(e);
                }
            } else {
                Key plaintextKey = new SecretKeySpec(params.getPlaintextKey(), params.getAlgorithmName());
                try {
                    byte[] encryptedSecretKey = cipher.wrap(plaintextKey);
                    params.setEncryptedKey(encryptedSecretKey);
                    params.setOpaqueKeyEncryptionKeyID(pathToKeyName);
                } catch (InvalidKeyException e) {
                    log.error("{}", e.getMessage(), e);
                    throw new RuntimeException(e);
                } catch (IllegalBlockSizeException e) {
                    log.error("{}", e.getMessage(), e);
                    throw new RuntimeException(e);
                }

            }
        } else {
            log.error("{}", "Error:bytesRead does not match EncryptionkeyLength");
            throw new IllegalArgumentException("Error:bytesRead does not match EncryptionkeyLength");
        }
    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:org.apache.hama.bsp.BSPJobClient.java

License:Apache License

public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException {
    BSPJob job = pJob;//from w ww.  jav  a  2  s.c o  m
    job.setJobID(jobId);

    int maxTasks;
    int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask());

    ClusterStatus clusterStatus = getClusterStatus(true);
    // Re-adjust the maxTasks based on cluster status.
    if (clusterStatus != null) {
        maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks();

        if (configured > maxTasks) {
            LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with "
                    + (maxTasks) + " tasks.");
            job.setNumBspTask(maxTasks);
        }
    } else {
        maxTasks = configured;
    }

    Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36));
    Path submitSplitFile = new Path(submitJobDir, "job.split");
    Path submitJarFile = new Path(submitJobDir, "job.jar");
    Path submitJobFile = new Path(submitJobDir, "job.xml");
    LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir);

    FileSystem fs = getFs();
    // Create a number of filenames in the BSPMaster's fs namespace
    fs.delete(submitJobDir, true);
    submitJobDir = fs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION);
    FileSystem.mkdirs(fs, submitJobDir, bspSysPerms);
    fs.mkdirs(submitJobDir);
    short replication = (short) job.getInt("bsp.submit.replication", 10);

    // only create the splits if we have an input
    if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) {
        // Create the splits for the job
        LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));

        InputSplit[] splits = job.getInputFormat().getSplits(job,
                (maxTasks > configured) ? configured : maxTasks);

        if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) {
            LOG.info("Run pre-partitioning job");
            job = partition(job, splits, maxTasks);
            maxTasks = job.getInt("hama.partition.count", maxTasks);
        }

        if (job.getBoolean("input.has.partitioned", false)) {
            splits = job.getInputFormat().getSplits(job, maxTasks);
        }

        if (maxTasks < splits.length) {
            throw new IOException(
                    "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: "
                            + splits.length + ", The number of max tasks: " + maxTasks);
        }

        int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks);
        if (numOfSplits > configured
                || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) {
            job.setNumBspTask(numOfSplits);
        }

        job.set("bsp.job.split.file", submitSplitFile.toString());
    }

    String originalJarPath = job.getJar();

    if (originalJarPath != null) { // copy jar to BSPMaster's fs
        // use jar name if job is not named.
        if ("".equals(job.getJobName())) {
            job.setJobName(new Path(originalJarPath).getName());
        }
        job.setJar(submitJarFile.toString());
        fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);

        fs.setReplication(submitJarFile, replication);
        fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
    } else {
        LOG.warn("No job jar file set.  User classes may not be found. "
                + "See BSPJob#setJar(String) or check Your jar file.");
    }

    // Set the user's name and working directory
    job.setUser(getUnixUserName());
    job.set("group.name", getUnixUserGroupName(job.getUser()));
    if (job.getWorkingDirectory() == null) {
        job.setWorkingDirectory(fs.getWorkingDirectory());
    }

    // Write job file to BSPMaster's fs
    FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION));

    try {
        job.writeXml(out);
    } finally {
        out.close();
    }

    return launchJob(jobId, job, submitJobFile, fs);
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

License:Apache License

private static void setupDistributedCache(PigContext pigContext, Configuration conf, String[] paths,
        boolean shipToCluster) throws IOException {
    // Turn on the symlink feature
    DistributedCache.createSymlink(conf);

    for (String path : paths) {
        path = path.trim();//w  w  w  .  j ava 2 s. c o m
        if (path.length() != 0) {
            Path src = new Path(path);

            // Ensure that 'src' is a valid URI
            URI srcURI = toURI(src);

            // Ship it to the cluster if necessary and add to the
            // DistributedCache
            if (shipToCluster) {
                Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toString());
                FileSystem fs = dst.getFileSystem(conf);
                fs.copyFromLocalFile(src, dst);
                fs.setReplication(dst, (short) conf.getInt(MRConfiguration.SUMIT_REPLICATION, 3));

                // Construct the dst#srcName uri for DistributedCache
                URI dstURI = null;
                try {
                    dstURI = new URI(dst.toString() + "#" + src.getName());
                } catch (URISyntaxException ue) {
                    byte errSrc = pigContext.getErrorSource();
                    int errCode = 0;
                    switch (errSrc) {
                    case PigException.REMOTE_ENVIRONMENT:
                        errCode = 6004;
                        break;
                    case PigException.USER_ENVIRONMENT:
                        errCode = 4004;
                        break;
                    default:
                        errCode = 2037;
                        break;
                    }
                    String msg = "Invalid ship specification. " + "File doesn't exist: " + dst;
                    throw new ExecException(msg, errCode, errSrc);
                }
                addToDistributedCache(dstURI, conf);
            } else {
                addToDistributedCache(srcURI, conf);
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.streaming.HadoopExecutableManager.java

License:Apache License

public void close() throws IOException {
    try {/*from w  w w .  j a  v  a2s. com*/
        super.close();

        // Copy the secondary outputs of the task to HDFS
        if (this.scriptOutputDir == null) {
            return;
        }
        Path scriptOutputDir = new Path(this.scriptOutputDir);
        FileSystem fs = scriptOutputDir.getFileSystem(job);
        List<HandleSpec> outputSpecs = command.getHandleSpecs(Handle.OUTPUT);
        if (outputSpecs != null) {
            for (int i = 1; i < outputSpecs.size(); ++i) {
                String fileName = outputSpecs.get(i).getName();
                try {
                    int partition = job.getInt(MRConfiguration.TASK_PARTITION, -1);
                    Path dst = new Path(new Path(scriptOutputDir, fileName), getOutputName(partition));
                    fs.copyFromLocalFile(false, true, new Path(fileName), dst);
                    fs.setReplication(dst, (short) job.getInt(MRConfiguration.SUMIT_REPLICATION, 3));
                } catch (IOException ioe) {
                    int errCode = 6014;
                    String msg = "Failed to save secondary output '" + fileName + "' of task: " + taskId;
                    throw new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT, ioe);
                }
            }
        }
    } finally {
        // Footer for stderr file of the task
        writeDebugFooter();

        // Close the stderr file on HDFS
        if (errorStream != null) {
            errorStream.close();
        }
    }
}

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately
 * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive.
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If source
 *                  is a folder its contents will be copied into dest. For more info see
 *                  {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 *///from ww w.j  av  a 2s.  co  m
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    Path local = new Path(source.getURL().getPath());
    fs.copyFromLocalFile(local, dest);
    fs.setPermission(dest, CACHED_FILE_PERMISSION);
    fs.setReplication(dest, replication);
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value
 * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging
 * the archive.//from  www  .ja v  a2  s  .c  o  m
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into
 *                  dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If
 *                  source is a folder its contents will be copied into dest. For more info see {@link
 *                  FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 */
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) {
        copyConfigProperties(source, fs, dest);
    } else {
        Path local = new Path(source.getURL().getPath());
        fs.copyFromLocalFile(local, dest);
    }

    if (isPublic) {
        fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION);
    } else {
        fs.setPermission(dest, CACHED_FILE_PERMISSION);
    }
    fs.setReplication(dest, replication);
}

From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {
        /**//from  ww  w  . j ava 2 s .co m
         * Default permission for cached files
         * <p/>
         * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
         */
        private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755);

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        /**
         * Stages the source file or folder to a Hadoop file system and sets their permission and replication
         * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of
         * dest before staging the archive.
         *
         * @param source    File or folder to copy to the file system. If it is a folder all contents will be
         *                  copied into dest.
         * @param fs        Hadoop file system to store the contents of the archive in
         * @param dest      Destination to copy source into. If source is a file, the new file name will be
         *                  exactly dest. If source is a folder its contents will be copied into dest. For more
         *                  info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path,
         *                  org.apache.hadoop.fs.Path)}.
         * @param overwrite Should an existing file or folder be overwritten? If not an exception will be
         *                  thrown.
         * @throws IOException         Destination exists is not a directory
         * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
         */
        public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException, KettleFileException {
            if (!source.exists()) {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.SourceDoesNotExist", source));
            }

            if (fs.exists(dest)) {
                if (overwrite) {
                    // It is a directory, clear it out
                    fs.delete(dest, true);
                } else {
                    throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                            "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
                }
            }

            // Use the same replication we'd use for submitting jobs
            short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

            copyFile(source, fs, dest, overwrite);
            fs.setReplication(dest, replication);
        }

        private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException {
            if (source.getType() == FileType.FOLDER) {
                fs.mkdirs(dest);
                fs.setPermission(dest, CACHED_FILE_PERMISSION);
                for (FileObject fileObject : source.getChildren()) {
                    copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite);
                }
            } else {
                try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) {
                    IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream);
                    fs.setPermission(dest, CACHED_FILE_PERMISSION);
                }
            }
        }

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

private void setrep(short replication, boolean recursive, FileSystem srcFs, Path src, List<Path> waitList)
        throws IOException {
    if (srcFs.isFile(src)) {
        if (srcFs.setReplication(src, replication)) {
            if (waitList != null) {
                waitList.add(src);/*from   w  ww  .ja  va 2 s  .c om*/
            }
        } else {
            throw new HadoopException("Cannot set replication for " + src);
        }
    } else {
        if (recursive) {
            FileStatus items[] = srcFs.listStatus(src);
            if (!ObjectUtils.isEmpty(items)) {
                for (FileStatus status : items) {
                    setrep(replication, recursive, srcFs, status.getPath(), waitList);
                }
            }
        }
    }
}