Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * Prepare the FileSystem and copy test files
 *///from   w  w w  .j av a2 s .c  o m
@Override
protected void setUp() throws Exception {
    // One-off initialisation
    if (isInitialised == false) {
        LOG.info("setUp() called, preparing FileSystem for tests");

        // 
        FileSystem fs = FileSystem.get(conf);

        // Delete our working directory if it already exists
        LOG.info("   ... Deleting " + workingPath.toString());
        fs.delete(workingPath, true);

        // Copy the test files
        LOG.info("   ... Copying files");
        fs.mkdirs(inputPath);
        copyFile(fs, "zip-01.zip");
        copyFile(fs, "zip-02.zip");
        copyFile(fs, "zip-03.zip");
        copyFile(fs, "zip-04.dat");
        copyFile(fs, "random.dat");
        copyFile(fs, "encrypted.zip");
        copyFile(fs, "corrupt.zip");
        fs.close();

        //
        isInitialised = true;
    }

    // Reset ZipFileInputFormat leniency (false)
    ZipFileInputFormat.setLenient(false);
}

From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/* w ww .j  av  a2  s  .c  om*/
    }
    Job job = new Job(conf, "Hadoop Secondary Sort");
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[1]), true);

    job.setJarByClass(HadoopSecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setGroupingComparatorClass(GroupingComparator.class);

    job.setMapOutputKeyClass(ComplexType.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java

License:Apache License

public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: urlresolution <url-map> <url-register> <out>");
        System.exit(2);//w w w  . j  a v a  2  s .  c o m
    }
    JobConf job = new JobConf(conf);
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[2]), true);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class);

    job.setJarByClass(HadoopUrlResolution.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setOutputValueGroupingComparator(GroupingComparator.class);

    job.setMapOutputKeyClass(UrlRegJoinUrlMap.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    Job j = new Job(job);
    j.setReducerClass(Reduce.class);
    j.waitForCompletion(true);
}

From source file:com.datasalt.pangool.benchmark.wordcount.PangoolWordCount.java

License:Apache License

public Job getJob(Configuration conf, String input, String output) throws TupleMRException, IOException {
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(output), true);

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("schema", fields);

    TupleMRBuilder cg = new TupleMRBuilder(conf, "Pangool WordCount");
    cg.addIntermediateSchema(schema);// ww  w  .j  ava  2 s  .com
    cg.setGroupByFields("word");
    cg.setJarByClass(PangoolWordCount.class);
    cg.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, Text.class);
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg.createJob();
}

From source file:com.datasalt.pangool.hive.TestPangoolHiveSerDe.java

License:Apache License

@Test
public void test() throws Exception {

    String INPUT = getClass().getCanonicalName() + "-input";
    String OUTPUT = getClass().getCanonicalName() + "-output";
    int NUM_ROWS = 50;

    Configuration hConf = new Configuration();
    FileSystem fs = FileSystem.get(hConf);

    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(OUTPUT), true);

    Class.forName(driverName);/*from   ww w. ja  v a  2s . c om*/
    Connection con;
    if (standAloneServer) {
        // get connection
        con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", "", "");
    } else {
        con = DriverManager.getConnection("jdbc:hive://", "", "");
    }

    // Writing a file with tuples
    TupleFile.Writer writer = new TupleFile.Writer(fs, hConf, new Path(INPUT), schema);
    Random rand = new Random(1);
    Tuple tuple = new Tuple(schema);

    for (int i = 0; i < NUM_ROWS; i++) {
        writer.append(fillTuple(rand, tuple));
    }

    writer.close();

    Statement stmt = con.createStatement();
    try {
        stmt.executeQuery("drop table pangool_test");
    } catch (Exception e) {
        // Do nothing. Probably the table don't exist.
    }

    String create_table = "create external table TABLENAME ("
            + "cint int, clong bigint, cfloat float, cdouble double, cstring string, cboolean boolean, "
            + "cenum int, cbytes binary " + "ROW FORMAT SERDE 'com.datasalt.pangool.hive.PangoolHiveSerDe' "
            + "STORED AS INPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveInputFormat' "
            + "OUTPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveoOutputFormat' "
            + "TBLPROPERTIES ('schema.name'='myschema' ";

    String create_table1 = create_table.replace("TABLENAME", "table1") + "LOCATION '" + INPUT + "');";
    String create_table2 = create_table.replace("TABLENAME", "table2") + "LOCATION '" + OUTPUT + "');";

    stmt.executeQuery();

    stmt.executeQuery("load data inpath ");

    TupleFile.Reader readerSource = new TupleFile.Reader(fs, hConf, new Path(INPUT));
    TupleFile.Reader readerTarget = new TupleFile.Reader(fs, hConf, new Path(OUTPUT));

    ITuple sourceTuple = null;
    ITuple targetTuple = null;
    while (readerSource.next(sourceTuple)) {
        readerTarget.next(targetTuple);
        assertEqualTuples(sourceTuple, targetTuple);
    }
    assertFalse(readerTarget.next(tuple));

    readerSource.close();
    readerTarget.close();

    stmt.executeQuery("drop table pangool_test");
    stmt.close();
    con.close();

    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(OUTPUT), true);
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestCombiner.java

License:Apache License

public TupleMRBuilder getBuilder(Configuration conf, String input, String output)
        throws TupleMRException, IOException {
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(output), true);

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("count", Type.INT));

    TupleMRBuilder cg = new TupleMRBuilder(conf);
    cg.addIntermediateSchema(new Schema("schema", fields));
    cg.setJarByClass(TestCombiner.class);
    cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
            IntWritable.class);
    cg.setGroupByFields("word");
    cg.setOrderBy(new OrderBy().add("word", Order.ASC));
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg;//from w  ww .  j  a va 2 s .co m
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in the Distributed Cache.
 * <p>//  ww w. ja  va 2s . c  o  m
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir"));
    if (!hadoopTmpDir.exists()) {
        hadoopTmpDir.mkdir();
    }
    File file = new File(hadoopTmpDir, serializeToLocalFile);
    FileSystem fS = FileSystem.get(conf);

    ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file));
    out.writeObject(obj);
    out.close();

    if (fS.equals(FileSystem.getLocal(conf))) {
        return;
    }

    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF);
    if (tmpHdfsFolder == null) {
        // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
        // This folder will be used across the cluster for location the instances. This way, tasktrackers
        // that are being run as different user will still be able to locate this folder
        tmpHdfsFolder = conf.get("hadoop.tmp.dir");
        conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder);
    }
    Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }
    FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf);
    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.HadoopUtils.java

License:Apache License

public static void deleteIfExists(FileSystem dFs, Path path) throws IOException {
    if (dFs.exists(path)) {
        dFs.delete(path, true);
    }/*from   w  ww .  j av a2s  .  co  m*/
}

From source file:com.datasalt.pangool.utils.InstancesDistributor.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in a way that later can be recovered
* anywhere in the cluster.//from  w w  w  . j a v  a 2  s. c om
 * <p>
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link InstancesDistributor#loadInstance(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param fileName The file name where the instance will be serialized.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void distribute(Object obj, String fileName, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    FileSystem fS = FileSystem.get(conf);
    // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
    // This folder will be used across the cluster for location the instances.
    // The default value can be changed by a user-provided one.
    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF, DEFAULT_HDFS_TMP_FOLDER_CONF_VALUE);
    Path toHdfs = new Path(tmpHdfsFolder, fileName);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }

    ObjectOutput out = new ObjectOutputStream(fS.create(toHdfs));
    out.writeObject(obj);
    out.close();

    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.InstancesDistributor.java

License:Apache License

/**
 * Delete a file that has been distributed using {@link #distribute(Object, String, Configuration)}.
 *//*w  w  w  . ja v  a  2  s  .c  o m*/
public static void removeFromCache(Configuration conf, String filename) throws IOException {
    FileSystem fS = FileSystem.get(conf);
    fS.delete(locateFileInCache(conf, filename), true);
}