Example usage for org.apache.hadoop.fs FileSystem setReplication

List of usage examples for org.apache.hadoop.fs FileSystem setReplication

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem setReplication.

Prototype

public boolean setReplication(Path src, short replication) throws IOException 

Source Link

Document

Set the replication for an existing file.

Usage

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/* ww  w .  java2  s. co  m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testSetReplication() throws Exception {
    FileSystem fs = FileSystem.get(getHadoopConf());
    Path path = new Path(getHadoopTestDir(), "foo.txt");
    OutputStream os = fs.create(path);
    os.write(1);// www .  jav a 2s .c om
    os.close();
    fs.close();
    fs.setReplication(path, (short) 2);

    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    fs = FileSystem.get(getJettyURL().toURI(), conf);
    fs.setReplication(path, (short) 1);
    fs.close();

    fs = FileSystem.get(getHadoopConf());
    FileStatus status1 = fs.getFileStatus(path);
    fs.close();
    Assert.assertEquals(status1.getReplication(), (short) 1);
}

From source file:com.cloudera.hoop.fs.FSSetReplication.java

License:Open Source License

/**
 * Executes the filesystem operation.//from   w  w w  .j a va2  s. c  o  m
 *
 * @param fs filesystem instance to use.
 * @return <code>true</code> if the replication value was set,
 * <code>false</code> otherwise.
 * @throws IOException thrown if an IO error occured.
 */
@Override
@SuppressWarnings("unchecked")
public JSONObject execute(FileSystem fs) throws IOException {
    boolean ret = fs.setReplication(path, replication);
    JSONObject json = new JSONObject();
    json.put("setReplication", ret);
    return json;
}

From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java

License:Apache License

/**
 * Preserve attribute on file matching that of the file status being sent
 * as argument. Barring the block size, all the other attributes are preserved
 * by this function//w  ww  . java  2 s  .  c om
 *
 * @param targetFS - File system
 * @param path - Path that needs to preserve original file status
 * @param srcFileStatus - Original file status
 * @param attributes - Attribute set that need to be preserved
 * @throws IOException - Exception if any (particularly relating to group/owner
 *                       change or any transient error)
 */
public static void preserve(FileSystem targetFS, Path path, FileStatus srcFileStatus,
        EnumSet<FileAttribute> attributes) throws IOException {

    FileStatus targetFileStatus = targetFS.getFileStatus(path);
    String group = targetFileStatus.getGroup();
    String user = targetFileStatus.getOwner();
    boolean chown = false;

    if (attributes.contains(FileAttribute.PERMISSION)
            && !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
        targetFS.setPermission(path, srcFileStatus.getPermission());
    }

    if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDir()
            && srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
        targetFS.setReplication(path, srcFileStatus.getReplication());
    }

    if (attributes.contains(FileAttribute.GROUP) && !group.equals(srcFileStatus.getGroup())) {
        group = srcFileStatus.getGroup();
        chown = true;
    }

    if (attributes.contains(FileAttribute.USER) && !user.equals(srcFileStatus.getOwner())) {
        user = srcFileStatus.getOwner();
        chown = true;
    }

    if (chown) {
        targetFS.setOwner(path, user, group);
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

@Override
public boolean setReplication(Path src, final short replication) throws IOException {
    statistics.incrementWriteOps(1);//from   ww w  .j  av  a2  s .com
    Path absF = fixRelativePart(src);
    return new FileSystemLinkResolver<Boolean>() {
        @Override
        public Boolean doCall(final Path p) throws IOException, UnresolvedLinkException {
            return dfs.setReplication(getPathName(p), replication);
        }

        @Override
        public Boolean next(final FileSystem fs, final Path p) throws IOException {
            return fs.setReplication(p, replication);
        }
    }.resolve(this, absF);
}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from  w  ww  .ja v a2 s.c o  m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:io.hops.erasure_coding.BaseEncodingManager.java

License:Apache License

/**
 * RAID an individual file/* w w w  .  j  a v  a 2s  .  c  o m*/
 */
public static boolean doFileRaid(Configuration conf, Path sourceFile, Path parityPath, Codec codec,
        Statistics statistics, Progressable reporter, int targetRepl, int metaRepl) throws IOException {
    FileSystem srcFs = sourceFile.getFileSystem(conf);
    FileStatus sourceStatus = srcFs.getFileStatus(sourceFile);

    // extract block locations from File system
    BlockLocation[] locations = srcFs.getFileBlockLocations(sourceFile, 0, sourceStatus.getLen());
    // if the file has fewer than 2 blocks, then nothing to do
    if (locations.length <= 2) {
        return false;
    }

    // add up the raw disk space occupied by this file
    long diskSpace = 0;
    for (BlockLocation l : locations) {
        diskSpace += (l.getLength() * sourceStatus.getReplication());
    }
    statistics.numProcessedBlocks += locations.length;
    statistics.processedSize += diskSpace;

    // generate parity file
    generateParityFile(conf, sourceStatus, targetRepl, reporter, srcFs, parityPath, codec, locations.length,
            sourceStatus.getReplication(), metaRepl, sourceStatus.getBlockSize());
    if (srcFs.setReplication(sourceFile, (short) targetRepl) == false) {
        LOG.info("Error in reducing replication of " + sourceFile + " to " + targetRepl);
        statistics.remainingSize += diskSpace;
        return false;
    }
    ;

    diskSpace = 0;
    for (BlockLocation l : locations) {
        diskSpace += (l.getLength() * targetRepl);
    }
    statistics.remainingSize += diskSpace;

    // the metafile will have this many number of blocks
    int numMeta = locations.length / codec.stripeLength;
    if (locations.length % codec.stripeLength != 0) {
        numMeta++;
    }

    // we create numMeta for every file. This metablock has metaRepl # replicas.
    // the last block of the metafile might not be completely filled up, but we
    // ignore that for now.
    statistics.numMetaBlocks += (numMeta * metaRepl);
    statistics.metaSize += (numMeta * metaRepl * sourceStatus.getBlockSize());
    return true;
}

From source file:io.hops.erasure_coding.Encoder.java

License:Apache License

/**
 * The interface to use to generate a parity file.
 * This method can be called multiple times with the same Encoder object,
 * thus allowing reuse of the buffers allocated by the Encoder object.
 *
 * @param fs/*from   w w  w  .  j ava 2  s .  c o  m*/
 *     The filesystem containing the source file.
 * @param srcFile
 *     The source file.
 * @param parityFile
 *     The parity file to be generated.
 */
public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
        short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader)
        throws IOException {
    long expectedParityBlocks = numStripes * codec.parityLength;
    long expectedParityFileSize = numStripes * blockSize * codec.parityLength;

    if (!parityFs.mkdirs(parityFile.getParent())) {
        throw new IOException("Could not create parent dir " + parityFile.getParent());
    }
    // delete destination if exists
    if (parityFs.exists(parityFile)) {
        parityFs.delete(parityFile, false);
    }

    // Writing out a large parity file at replication 1 is difficult since
    // some datanode could die and we would not be able to close() the file.
    // So write at replication 2 and then reduce it after close() succeeds.
    short tmpRepl = parityRepl;
    if (expectedParityBlocks >= conf.getInt("raid.encoder.largeparity.blocks", 20)) {
        if (parityRepl == 1) {
            tmpRepl = 2;
        }
    }
    FSDataOutputStream out = parityFs.create(parityFile, true, conf.getInt("io.file.buffer.size", 64 * 1024),
            tmpRepl, blockSize);

    DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream();
    dfsOut.enableParityStream(codec.getStripeLength(), codec.getParityLength(), srcFile.toUri().getPath());

    try {
        encodeFileToStream(fs, srcFile, parityFile, sReader, blockSize, out, reporter);
        out.close();
        out = null;
        LOG.info("Wrote parity file " + parityFile);
        FileStatus tmpStat = parityFs.getFileStatus(parityFile);
        if (tmpStat.getLen() != expectedParityFileSize) {
            throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual "
                    + tmpStat.getLen());
        }
        if (tmpRepl > parityRepl) {
            parityFs.setReplication(parityFile, parityRepl);
        }
        LOG.info("Wrote parity file " + parityFile);
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

From source file:io.hops.experiments.utils.DFSOperationsUtils.java

License:Apache License

public static void setReplication(FileSystem dfs, String pathStr) throws IOException {
    if (SERVER_LESS_MODE) {
        serverLessModeRandomWait();/*from  w  w  w  .  j  a v  a  2s  .c  om*/
        return;
    }
    dfs.setReplication(new Path(pathStr), (short) 3);
}

From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java

License:Apache License

private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, Configuration job)
        throws IOException {
    FSDataOutputStream out = FileSystem.create(fs, splitFile,
            new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
    int replication = job.getInt("mapred.submit.replication", 10);
    fs.setReplication(splitFile, (short) replication);
    writeSplitHeader(out);/*from   www  . j a v a  2  s. c o m*/
    return out;
}