Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * Prepare the FileSystem and copy test files
 *///from   w  w w  .j av a2 s .c  o m
@Override
protected void setUp() throws Exception {
    // One-off initialisation
    if (isInitialised == false) {
        LOG.info("setUp() called, preparing FileSystem for tests");

        // 
        FileSystem fs = FileSystem.get(conf);

        // Delete our working directory if it already exists
        LOG.info("   ... Deleting " + workingPath.toString());
        fs.delete(workingPath, true);

        // Copy the test files
        LOG.info("   ... Copying files");
        fs.mkdirs(inputPath);
        copyFile(fs, "zip-01.zip");
        copyFile(fs, "zip-02.zip");
        copyFile(fs, "zip-03.zip");
        copyFile(fs, "zip-04.dat");
        copyFile(fs, "random.dat");
        copyFile(fs, "encrypted.zip");
        copyFile(fs, "corrupt.zip");
        fs.close();

        //
        isInitialised = true;
    }

    // Reset ZipFileInputFormat leniency (false)
    ZipFileInputFormat.setLenient(false);
}

From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/* w ww .j  av  a2  s  .c  om*/
    }
    Job job = new Job(conf, "Hadoop Secondary Sort");
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[1]), true);

    job.setJarByClass(HadoopSecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setGroupingComparatorClass(GroupingComparator.class);

    job.setMapOutputKeyClass(ComplexType.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java

License:Apache License

public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: urlresolution <url-map> <url-register> <out>");
        System.exit(2);//w w w  . j  a v a  2  s .  c o m
    }
    JobConf job = new JobConf(conf);
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[2]), true);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class);

    job.setJarByClass(HadoopUrlResolution.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setOutputValueGroupingComparator(GroupingComparator.class);

    job.setMapOutputKeyClass(UrlRegJoinUrlMap.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    Job j = new Job(job);
    j.setReducerClass(Reduce.class);
    j.waitForCompletion(true);
}

From source file:com.datasalt.pangool.benchmark.wordcount.PangoolWordCount.java

License:Apache License

public Job getJob(Configuration conf, String input, String output) throws TupleMRException, IOException {
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(output), true);

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("schema", fields);

    TupleMRBuilder cg = new TupleMRBuilder(conf, "Pangool WordCount");
    cg.addIntermediateSchema(schema);// ww  w  .j  ava  2 s  .com
    cg.setGroupByFields("word");
    cg.setJarByClass(PangoolWordCount.class);
    cg.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, Text.class);
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg.createJob();
}

From source file:com.datasalt.pangool.hive.TestPangoolHiveSerDe.java

License:Apache License

@Test
public void test() throws Exception {

    String INPUT = getClass().getCanonicalName() + "-input";
    String OUTPUT = getClass().getCanonicalName() + "-output";
    int NUM_ROWS = 50;

    Configuration hConf = new Configuration();
    FileSystem fs = FileSystem.get(hConf);

    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(OUTPUT), true);

    Class.forName(driverName);/*from   ww w. ja  v a  2s . c om*/
    Connection con;
    if (standAloneServer) {
        // get connection
        con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", "", "");
    } else {
        con = DriverManager.getConnection("jdbc:hive://", "", "");
    }

    // Writing a file with tuples
    TupleFile.Writer writer = new TupleFile.Writer(fs, hConf, new Path(INPUT), schema);
    Random rand = new Random(1);
    Tuple tuple = new Tuple(schema);

    for (int i = 0; i < NUM_ROWS; i++) {
        writer.append(fillTuple(rand, tuple));
    }

    writer.close();

    Statement stmt = con.createStatement();
    try {
        stmt.executeQuery("drop table pangool_test");
    } catch (Exception e) {
        // Do nothing. Probably the table don't exist.
    }

    String create_table = "create external table TABLENAME ("
            + "cint int, clong bigint, cfloat float, cdouble double, cstring string, cboolean boolean, "
            + "cenum int, cbytes binary " + "ROW FORMAT SERDE 'com.datasalt.pangool.hive.PangoolHiveSerDe' "
            + "STORED AS INPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveInputFormat' "
            + "OUTPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveoOutputFormat' "
            + "TBLPROPERTIES ('schema.name'='myschema' ";

    String create_table1 = create_table.replace("TABLENAME", "table1") + "LOCATION '" + INPUT + "');";
    String create_table2 = create_table.replace("TABLENAME", "table2") + "LOCATION '" + OUTPUT + "');";

    stmt.executeQuery();

    stmt.executeQuery("load data inpath ");

    TupleFile.Reader readerSource = new TupleFile.Reader(fs, hConf, new Path(INPUT));
    TupleFile.Reader readerTarget = new TupleFile.Reader(fs, hConf, new Path(OUTPUT));

    ITuple sourceTuple = null;
    ITuple targetTuple = null;
    while (readerSource.next(sourceTuple)) {
        readerTarget.next(targetTuple);
        assertEqualTuples(sourceTuple, targetTuple);
    }
    assertFalse(readerTarget.next(tuple));

    readerSource.close();
    readerTarget.close();

    stmt.executeQuery("drop table pangool_test");
    stmt.close();
    con.close();

    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(OUTPUT), true);
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestCombiner.java

License:Apache License

public TupleMRBuilder getBuilder(Configuration conf, String input, String output)
        throws TupleMRException, IOException {
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(output), true);

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("count", Type.INT));

    TupleMRBuilder cg = new TupleMRBuilder(conf);
    cg.addIntermediateSchema(new Schema("schema", fields));
    cg.setJarByClass(TestCombiner.class);
    cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
            IntWritable.class);
    cg.setGroupByFields("word");
    cg.setOrderBy(new OrderBy().add("word", Order.ASC));
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg;//from w  ww .  j  a va 2 s .co m
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in the Distributed Cache.
 * <p>//  ww w. ja  va 2s . c  o  m
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir"));
    if (!hadoopTmpDir.exists()) {
        hadoopTmpDir.mkdir();
    }
    File file = new File(hadoopTmpDir, serializeToLocalFile);
    FileSystem fS = FileSystem.get(conf);

    ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file));
    out.writeObject(obj);
    out.close();

    if (fS.equals(FileSystem.getLocal(conf))) {
        return;
    }

    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF);
    if (tmpHdfsFolder == null) {
        // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
        // This folder will be used across the cluster for location the instances. This way, tasktrackers
        // that are being run as different user will still be able to locate this folder
        tmpHdfsFolder = conf.get("hadoop.tmp.dir");
        conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder);
    }
    Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }
    FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf);
    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.HadoopUtils.java

License:Apache License

public static void deleteIfExists(FileSystem dFs, Path path) throws IOException {
    if (dFs.exists(path)) {
        dFs.delete(path, true);
    }/*from   w  ww .  j av a2s  .  co  m*/
}

From source file:com.datasalt.pangool.utils.InstancesDistributor.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in a way that later can be recovered
* anywhere in the cluster.//from  w w  w  . j a v  a 2  s. c om
 * <p>
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link InstancesDistributor#loadInstance(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param fileName The file name where the instance will be serialized.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void distribute(Object obj, String fileName, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    FileSystem fS = FileSystem.get(conf);
    // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
    // This folder will be used across the cluster for location the instances.
    // The default value can be changed by a user-provided one.
    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF, DEFAULT_HDFS_TMP_FOLDER_CONF_VALUE);
    Path toHdfs = new Path(tmpHdfsFolder, fileName);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }

    ObjectOutput out = new ObjectOutputStream(fS.create(toHdfs));
    out.writeObject(obj);
    out.close();

    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.InstancesDistributor.java

License:Apache License

/**
 * Delete a file that has been distributed using {@link #distribute(Object, String, Configuration)}.
 *//*w  w  w  . ja v  a  2  s  .c  o m*/
public static void removeFromCache(Configuration conf, String filename) throws IOException {
    FileSystem fS = FileSystem.get(conf);
    fS.delete(locateFileInCache(conf, filename), true);
}