List of usage examples for org.apache.hadoop.fs FileSystem setReplication
public boolean setReplication(Path src, short replication) throws IOException
From source file:com.blackberry.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/* ww w . java2 s. co m*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testSetReplication() throws Exception { FileSystem fs = FileSystem.get(getHadoopConf()); Path path = new Path(getHadoopTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1);// www . jav a 2s .c om os.close(); fs.close(); fs.setReplication(path, (short) 2); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); fs = FileSystem.get(getJettyURL().toURI(), conf); fs.setReplication(path, (short) 1); fs.close(); fs = FileSystem.get(getHadoopConf()); FileStatus status1 = fs.getFileStatus(path); fs.close(); Assert.assertEquals(status1.getReplication(), (short) 1); }
From source file:com.cloudera.hoop.fs.FSSetReplication.java
License:Open Source License
/** * Executes the filesystem operation.//from w w w .j a va2 s. c o m * * @param fs filesystem instance to use. * @return <code>true</code> if the replication value was set, * <code>false</code> otherwise. * @throws IOException thrown if an IO error occured. */ @Override @SuppressWarnings("unchecked") public JSONObject execute(FileSystem fs) throws IOException { boolean ret = fs.setReplication(path, replication); JSONObject json = new JSONObject(); json.put("setReplication", ret); return json; }
From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java
License:Apache License
/** * Preserve attribute on file matching that of the file status being sent * as argument. Barring the block size, all the other attributes are preserved * by this function//w ww . java 2 s . c om * * @param targetFS - File system * @param path - Path that needs to preserve original file status * @param srcFileStatus - Original file status * @param attributes - Attribute set that need to be preserved * @throws IOException - Exception if any (particularly relating to group/owner * change or any transient error) */ public static void preserve(FileSystem targetFS, Path path, FileStatus srcFileStatus, EnumSet<FileAttribute> attributes) throws IOException { FileStatus targetFileStatus = targetFS.getFileStatus(path); String group = targetFileStatus.getGroup(); String user = targetFileStatus.getOwner(); boolean chown = false; if (attributes.contains(FileAttribute.PERMISSION) && !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) { targetFS.setPermission(path, srcFileStatus.getPermission()); } if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDir() && srcFileStatus.getReplication() != targetFileStatus.getReplication()) { targetFS.setReplication(path, srcFileStatus.getReplication()); } if (attributes.contains(FileAttribute.GROUP) && !group.equals(srcFileStatus.getGroup())) { group = srcFileStatus.getGroup(); chown = true; } if (attributes.contains(FileAttribute.USER) && !user.equals(srcFileStatus.getOwner())) { user = srcFileStatus.getOwner(); chown = true; } if (chown) { targetFS.setOwner(path, user, group); } }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
@Override public boolean setReplication(Path src, final short replication) throws IOException { statistics.incrementWriteOps(1);//from ww w .j av a2 s .com Path absF = fixRelativePart(src); return new FileSystemLinkResolver<Boolean>() { @Override public Boolean doCall(final Path p) throws IOException, UnresolvedLinkException { return dfs.setReplication(getPathName(p), replication); } @Override public Boolean next(final FileSystem fs, final Path p) throws IOException { return fs.setReplication(p, replication); } }.resolve(this, absF); }
From source file:com.rim.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*from w ww .ja v a2 s.c o m*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:io.hops.erasure_coding.BaseEncodingManager.java
License:Apache License
/** * RAID an individual file/* w w w . j a v a 2s . c o m*/ */ public static boolean doFileRaid(Configuration conf, Path sourceFile, Path parityPath, Codec codec, Statistics statistics, Progressable reporter, int targetRepl, int metaRepl) throws IOException { FileSystem srcFs = sourceFile.getFileSystem(conf); FileStatus sourceStatus = srcFs.getFileStatus(sourceFile); // extract block locations from File system BlockLocation[] locations = srcFs.getFileBlockLocations(sourceFile, 0, sourceStatus.getLen()); // if the file has fewer than 2 blocks, then nothing to do if (locations.length <= 2) { return false; } // add up the raw disk space occupied by this file long diskSpace = 0; for (BlockLocation l : locations) { diskSpace += (l.getLength() * sourceStatus.getReplication()); } statistics.numProcessedBlocks += locations.length; statistics.processedSize += diskSpace; // generate parity file generateParityFile(conf, sourceStatus, targetRepl, reporter, srcFs, parityPath, codec, locations.length, sourceStatus.getReplication(), metaRepl, sourceStatus.getBlockSize()); if (srcFs.setReplication(sourceFile, (short) targetRepl) == false) { LOG.info("Error in reducing replication of " + sourceFile + " to " + targetRepl); statistics.remainingSize += diskSpace; return false; } ; diskSpace = 0; for (BlockLocation l : locations) { diskSpace += (l.getLength() * targetRepl); } statistics.remainingSize += diskSpace; // the metafile will have this many number of blocks int numMeta = locations.length / codec.stripeLength; if (locations.length % codec.stripeLength != 0) { numMeta++; } // we create numMeta for every file. This metablock has metaRepl # replicas. // the last block of the metafile might not be completely filled up, but we // ignore that for now. statistics.numMetaBlocks += (numMeta * metaRepl); statistics.metaSize += (numMeta * metaRepl * sourceStatus.getBlockSize()); return true; }
From source file:io.hops.erasure_coding.Encoder.java
License:Apache License
/** * The interface to use to generate a parity file. * This method can be called multiple times with the same Encoder object, * thus allowing reuse of the buffers allocated by the Encoder object. * * @param fs/*from w w w . j ava 2 s . c o m*/ * The filesystem containing the source file. * @param srcFile * The source file. * @param parityFile * The parity file to be generated. */ public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile, short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader) throws IOException { long expectedParityBlocks = numStripes * codec.parityLength; long expectedParityFileSize = numStripes * blockSize * codec.parityLength; if (!parityFs.mkdirs(parityFile.getParent())) { throw new IOException("Could not create parent dir " + parityFile.getParent()); } // delete destination if exists if (parityFs.exists(parityFile)) { parityFs.delete(parityFile, false); } // Writing out a large parity file at replication 1 is difficult since // some datanode could die and we would not be able to close() the file. // So write at replication 2 and then reduce it after close() succeeds. short tmpRepl = parityRepl; if (expectedParityBlocks >= conf.getInt("raid.encoder.largeparity.blocks", 20)) { if (parityRepl == 1) { tmpRepl = 2; } } FSDataOutputStream out = parityFs.create(parityFile, true, conf.getInt("io.file.buffer.size", 64 * 1024), tmpRepl, blockSize); DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream(); dfsOut.enableParityStream(codec.getStripeLength(), codec.getParityLength(), srcFile.toUri().getPath()); try { encodeFileToStream(fs, srcFile, parityFile, sReader, blockSize, out, reporter); out.close(); out = null; LOG.info("Wrote parity file " + parityFile); FileStatus tmpStat = parityFs.getFileStatus(parityFile); if (tmpStat.getLen() != expectedParityFileSize) { throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual " + tmpStat.getLen()); } if (tmpRepl > parityRepl) { parityFs.setReplication(parityFile, parityRepl); } LOG.info("Wrote parity file " + parityFile); } finally { if (out != null) { out.close(); } } }
From source file:io.hops.experiments.utils.DFSOperationsUtils.java
License:Apache License
public static void setReplication(FileSystem dfs, String pathStr) throws IOException { if (SERVER_LESS_MODE) { serverLessModeRandomWait();/*from w w w . j a v a 2s .c om*/ return; } dfs.setReplication(new Path(pathStr), (short) 3); }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, Configuration job) throws IOException { FSDataOutputStream out = FileSystem.create(fs, splitFile, new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION)); int replication = job.getInt("mapred.submit.replication", 10); fs.setReplication(splitFile, (short) replication); writeSplitHeader(out);/*from www . j a v a 2 s. c o m*/ return out; }