List of usage examples for org.apache.hadoop.fs FileSystem setReplication
public boolean setReplication(Path src, short replication) throws IOException
From source file:org.apache.accumulo.core.security.crypto.DefaultSecretKeyEncryptionStrategy.java
License:Apache License
private void doKeyEncryptionOperation(int encryptionMode, CryptoModuleParameters params, String pathToKeyName, Path pathToKey, FileSystem fs) throws IOException { DataInputStream in = null;/* ww w .ja v a2 s . c o m*/ try { if (!fs.exists(pathToKey)) { if (encryptionMode == Cipher.UNWRAP_MODE) { log.error( "There was a call to decrypt the session key but no key encryption key exists. Either restore it, reconfigure the conf file to point to it in HDFS, or throw the affected data away and begin again."); throw new RuntimeException( "Could not find key encryption key file in configured location in HDFS (" + pathToKeyName + ")"); } else { DataOutputStream out = null; try { out = fs.create(pathToKey); // Very important, lets hedge our bets fs.setReplication(pathToKey, (short) 5); SecureRandom random = DefaultCryptoModuleUtils.getSecureRandom( params.getRandomNumberGenerator(), params.getRandomNumberGeneratorProvider()); int keyLength = params.getKeyLength(); byte[] newRandomKeyEncryptionKey = new byte[keyLength / 8]; random.nextBytes(newRandomKeyEncryptionKey); out.writeInt(newRandomKeyEncryptionKey.length); out.write(newRandomKeyEncryptionKey); out.flush(); } finally { if (out != null) { out.close(); } } } } in = fs.open(pathToKey); int keyEncryptionKeyLength = in.readInt(); byte[] keyEncryptionKey = new byte[keyEncryptionKeyLength]; in.read(keyEncryptionKey); Cipher cipher = DefaultCryptoModuleUtils.getCipher( params.getAllOptions().get(Property.CRYPTO_DEFAULT_KEY_STRATEGY_CIPHER_SUITE.getKey())); try { cipher.init(encryptionMode, new SecretKeySpec(keyEncryptionKey, params.getAlgorithmName())); } catch (InvalidKeyException e) { log.error(e); throw new RuntimeException(e); } if (Cipher.UNWRAP_MODE == encryptionMode) { try { Key plaintextKey = cipher.unwrap(params.getEncryptedKey(), params.getAlgorithmName(), Cipher.SECRET_KEY); params.setPlaintextKey(plaintextKey.getEncoded()); } catch (InvalidKeyException e) { log.error(e); throw new RuntimeException(e); } catch (NoSuchAlgorithmException e) { log.error(e); throw new RuntimeException(e); } } else { Key plaintextKey = new SecretKeySpec(params.getPlaintextKey(), params.getAlgorithmName()); try { byte[] encryptedSecretKey = cipher.wrap(plaintextKey); params.setEncryptedKey(encryptedSecretKey); params.setOpaqueKeyEncryptionKeyID(pathToKeyName); } catch (InvalidKeyException e) { log.error(e); throw new RuntimeException(e); } catch (IllegalBlockSizeException e) { log.error(e); throw new RuntimeException(e); } } } finally { if (in != null) { in.close(); } } }
From source file:org.apache.accumulo.core.security.crypto.NonCachingSecretKeyEncryptionStrategy.java
License:Apache License
private void doKeyEncryptionOperation(int encryptionMode, CryptoModuleParameters params, String pathToKeyName, Path pathToKey, FileSystem fs) throws IOException { DataInputStream in = null;//from w ww. ja va2 s .co m try { if (!fs.exists(pathToKey)) { if (encryptionMode == Cipher.UNWRAP_MODE) { log.error("There was a call to decrypt the session key but no key encryption key exists. " + "Either restore it, reconfigure the conf file to point to it in HDFS, or throw the affected data away and begin again."); throw new RuntimeException( "Could not find key encryption key file in configured location in HDFS (" + pathToKeyName + ")"); } else { DataOutputStream out = null; try { out = fs.create(pathToKey); // Very important, lets hedge our bets fs.setReplication(pathToKey, (short) 5); SecureRandom random = DefaultCryptoModuleUtils.getSecureRandom( params.getRandomNumberGenerator(), params.getRandomNumberGeneratorProvider()); int keyLength = params.getKeyLength(); byte[] newRandomKeyEncryptionKey = new byte[keyLength / 8]; random.nextBytes(newRandomKeyEncryptionKey); out.writeInt(newRandomKeyEncryptionKey.length); out.write(newRandomKeyEncryptionKey); out.flush(); } finally { if (out != null) { out.close(); } } } } in = fs.open(pathToKey); int keyEncryptionKeyLength = in.readInt(); byte[] keyEncryptionKey = new byte[keyEncryptionKeyLength]; int bytesRead = in.read(keyEncryptionKey); Cipher cipher = DefaultCryptoModuleUtils.getCipher( params.getAllOptions().get(Property.CRYPTO_DEFAULT_KEY_STRATEGY_CIPHER_SUITE.getKey())); // check if the number of bytes read into the array is the same as the value of the length field, if (bytesRead == keyEncryptionKeyLength) { try { cipher.init(encryptionMode, new SecretKeySpec(keyEncryptionKey, params.getAlgorithmName())); } catch (InvalidKeyException e) { log.error("{}", e.getMessage(), e); throw new RuntimeException(e); } if (Cipher.UNWRAP_MODE == encryptionMode) { try { Key plaintextKey = cipher.unwrap(params.getEncryptedKey(), params.getAlgorithmName(), Cipher.SECRET_KEY); params.setPlaintextKey(plaintextKey.getEncoded()); } catch (InvalidKeyException e) { log.error("{}", e.getMessage(), e); throw new RuntimeException(e); } catch (NoSuchAlgorithmException e) { log.error("{}", e.getMessage(), e); throw new RuntimeException(e); } } else { Key plaintextKey = new SecretKeySpec(params.getPlaintextKey(), params.getAlgorithmName()); try { byte[] encryptedSecretKey = cipher.wrap(plaintextKey); params.setEncryptedKey(encryptedSecretKey); params.setOpaqueKeyEncryptionKeyID(pathToKeyName); } catch (InvalidKeyException e) { log.error("{}", e.getMessage(), e); throw new RuntimeException(e); } catch (IllegalBlockSizeException e) { log.error("{}", e.getMessage(), e); throw new RuntimeException(e); } } } else { log.error("{}", "Error:bytesRead does not match EncryptionkeyLength"); throw new IllegalArgumentException("Error:bytesRead does not match EncryptionkeyLength"); } } finally { if (in != null) { in.close(); } } }
From source file:org.apache.hama.bsp.BSPJobClient.java
License:Apache License
public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException { BSPJob job = pJob;//from w ww. jav a 2 s.c o m job.setJobID(jobId); int maxTasks; int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask()); ClusterStatus clusterStatus = getClusterStatus(true); // Re-adjust the maxTasks based on cluster status. if (clusterStatus != null) { maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks(); if (configured > maxTasks) { LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with " + (maxTasks) + " tasks."); job.setNumBspTask(maxTasks); } } else { maxTasks = configured; } Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36)); Path submitSplitFile = new Path(submitJobDir, "job.split"); Path submitJarFile = new Path(submitJobDir, "job.jar"); Path submitJobFile = new Path(submitJobDir, "job.xml"); LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir); FileSystem fs = getFs(); // Create a number of filenames in the BSPMaster's fs namespace fs.delete(submitJobDir, true); submitJobDir = fs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION); FileSystem.mkdirs(fs, submitJobDir, bspSysPerms); fs.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); // only create the splits if we have an input if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) { // Create the splits for the job LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile)); InputSplit[] splits = job.getInputFormat().getSplits(job, (maxTasks > configured) ? configured : maxTasks); if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) { LOG.info("Run pre-partitioning job"); job = partition(job, splits, maxTasks); maxTasks = job.getInt("hama.partition.count", maxTasks); } if (job.getBoolean("input.has.partitioned", false)) { splits = job.getInputFormat().getSplits(job, maxTasks); } if (maxTasks < splits.length) { throw new IOException( "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: " + splits.length + ", The number of max tasks: " + maxTasks); } int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks); if (numOfSplits > configured || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) { job.setNumBspTask(numOfSplits); } job.set("bsp.job.split.file", submitSplitFile.toString()); } String originalJarPath = job.getJar(); if (originalJarPath != null) { // copy jar to BSPMaster's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION)); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(getUnixUserName()); job.set("group.name", getUnixUserGroupName(job.getUser())); if (job.getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } // Write job file to BSPMaster's fs FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION)); try { job.writeXml(out); } finally { out.close(); } return launchJob(jobId, job, submitJobFile, fs); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java
License:Apache License
private static void setupDistributedCache(PigContext pigContext, Configuration conf, String[] paths, boolean shipToCluster) throws IOException { // Turn on the symlink feature DistributedCache.createSymlink(conf); for (String path : paths) { path = path.trim();//w w w . j ava 2 s. c o m if (path.length() != 0) { Path src = new Path(path); // Ensure that 'src' is a valid URI URI srcURI = toURI(src); // Ship it to the cluster if necessary and add to the // DistributedCache if (shipToCluster) { Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toString()); FileSystem fs = dst.getFileSystem(conf); fs.copyFromLocalFile(src, dst); fs.setReplication(dst, (short) conf.getInt(MRConfiguration.SUMIT_REPLICATION, 3)); // Construct the dst#srcName uri for DistributedCache URI dstURI = null; try { dstURI = new URI(dst.toString() + "#" + src.getName()); } catch (URISyntaxException ue) { byte errSrc = pigContext.getErrorSource(); int errCode = 0; switch (errSrc) { case PigException.REMOTE_ENVIRONMENT: errCode = 6004; break; case PigException.USER_ENVIRONMENT: errCode = 4004; break; default: errCode = 2037; break; } String msg = "Invalid ship specification. " + "File doesn't exist: " + dst; throw new ExecException(msg, errCode, errSrc); } addToDistributedCache(dstURI, conf); } else { addToDistributedCache(srcURI, conf); } } } }
From source file:org.apache.pig.backend.hadoop.streaming.HadoopExecutableManager.java
License:Apache License
public void close() throws IOException { try {/*from w w w . j a v a2s. com*/ super.close(); // Copy the secondary outputs of the task to HDFS if (this.scriptOutputDir == null) { return; } Path scriptOutputDir = new Path(this.scriptOutputDir); FileSystem fs = scriptOutputDir.getFileSystem(job); List<HandleSpec> outputSpecs = command.getHandleSpecs(Handle.OUTPUT); if (outputSpecs != null) { for (int i = 1; i < outputSpecs.size(); ++i) { String fileName = outputSpecs.get(i).getName(); try { int partition = job.getInt(MRConfiguration.TASK_PARTITION, -1); Path dst = new Path(new Path(scriptOutputDir, fileName), getOutputName(partition)); fs.copyFromLocalFile(false, true, new Path(fileName), dst); fs.setReplication(dst, (short) job.getInt(MRConfiguration.SUMIT_REPLICATION, 3)); } catch (IOException ioe) { int errCode = 6014; String msg = "Failed to save secondary output '" + fileName + "' of task: " + taskId; throw new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT, ioe); } } } } finally { // Footer for stderr file of the task writeDebugFooter(); // Close the stderr file on HDFS if (errorStream != null) { errorStream.close(); } } }
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java
License:Apache License
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If source * is a folder its contents will be copied into dest. For more info see * {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. *///from ww w.j av a 2s. co m public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); Path local = new Path(source.getURL().getPath()); fs.copyFromLocalFile(local, dest); fs.setPermission(dest, CACHED_FILE_PERMISSION); fs.setReplication(dest, replication); }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java
License:Apache License
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging * the archive.//from www .ja v a2 s .c o m * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into * dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If * source is a folder its contents will be copied into dest. For more info see {@link * FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) { copyConfigProperties(source, fs, dest); } else { Path local = new Path(source.getURL().getPath()); fs.copyFromLocalFile(local, dest); } if (isPublic) { fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION); } else { fs.setPermission(dest, CACHED_FILE_PERMISSION); } fs.setReplication(dest, replication); }
From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { /**//from ww w . j ava 2 s .co m * Default permission for cached files * <p/> * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2 */ private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755); public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); } /** * Stages the source file or folder to a Hadoop file system and sets their permission and replication * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of * dest before staging the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be * copied into dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be * exactly dest. If source is a folder its contents will be copied into dest. For more * info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, * org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be * thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); copyFile(source, fs, dest, overwrite); fs.setReplication(dest, replication); } private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException { if (source.getType() == FileType.FOLDER) { fs.mkdirs(dest); fs.setPermission(dest, CACHED_FILE_PERMISSION); for (FileObject fileObject : source.getChildren()) { copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite); } } else { try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) { IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream); fs.setPermission(dest, CACHED_FILE_PERMISSION); } } } public String getClusterPathSeparator() { return System.getProperty("hadoop.cluster.path.separator", ","); } }); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
private void setrep(short replication, boolean recursive, FileSystem srcFs, Path src, List<Path> waitList) throws IOException { if (srcFs.isFile(src)) { if (srcFs.setReplication(src, replication)) { if (waitList != null) { waitList.add(src);/*from w ww .ja va 2 s .c om*/ } } else { throw new HadoopException("Cannot set replication for " + src); } } else { if (recursive) { FileStatus items[] = srcFs.listStatus(src); if (!ObjectUtils.isEmpty(items)) { for (FileStatus status : items) { setrep(replication, recursive, srcFs, status.getPath(), waitList); } } } } }