List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * Prepare the FileSystem and copy test files *///from w w w .j av a2 s .c o m @Override protected void setUp() throws Exception { // One-off initialisation if (isInitialised == false) { LOG.info("setUp() called, preparing FileSystem for tests"); // FileSystem fs = FileSystem.get(conf); // Delete our working directory if it already exists LOG.info(" ... Deleting " + workingPath.toString()); fs.delete(workingPath, true); // Copy the test files LOG.info(" ... Copying files"); fs.mkdirs(inputPath); copyFile(fs, "zip-01.zip"); copyFile(fs, "zip-02.zip"); copyFile(fs, "zip-03.zip"); copyFile(fs, "zip-04.dat"); copyFile(fs, "random.dat"); copyFile(fs, "encrypted.zip"); copyFile(fs, "corrupt.zip"); fs.close(); // isInitialised = true; } // Reset ZipFileInputFormat leniency (false) ZipFileInputFormat.setLenient(false); }
From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);/* w ww .j av a2 s .c om*/ } Job job = new Job(conf, "Hadoop Secondary Sort"); FileSystem fS = FileSystem.get(conf); fS.delete(new Path(otherArgs[1]), true); job.setJarByClass(HadoopSecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(KeyPartitioner.class); job.setGroupingComparatorClass(GroupingComparator.class); job.setMapOutputKeyClass(ComplexType.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); }
From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java
License:Apache License
public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: urlresolution <url-map> <url-register> <out>"); System.exit(2);//w w w . j a v a 2 s . c o m } JobConf job = new JobConf(conf); FileSystem fS = FileSystem.get(conf); fS.delete(new Path(otherArgs[2]), true); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class); job.setJarByClass(HadoopUrlResolution.class); job.setPartitionerClass(KeyPartitioner.class); job.setOutputValueGroupingComparator(GroupingComparator.class); job.setMapOutputKeyClass(UrlRegJoinUrlMap.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); Job j = new Job(job); j.setReducerClass(Reduce.class); j.waitForCompletion(true); }
From source file:com.datasalt.pangool.benchmark.wordcount.PangoolWordCount.java
License:Apache License
public Job getJob(Configuration conf, String input, String output) throws TupleMRException, IOException { FileSystem fs = FileSystem.get(conf); fs.delete(new Path(output), true); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("word", Type.STRING)); fields.add(Field.create("count", Type.INT)); Schema schema = new Schema("schema", fields); TupleMRBuilder cg = new TupleMRBuilder(conf, "Pangool WordCount"); cg.addIntermediateSchema(schema);// ww w .j ava 2 s .com cg.setGroupByFields("word"); cg.setJarByClass(PangoolWordCount.class); cg.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new Split()); cg.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, Text.class); cg.setTupleReducer(new Count()); cg.setTupleCombiner(new CountCombiner()); return cg.createJob(); }
From source file:com.datasalt.pangool.hive.TestPangoolHiveSerDe.java
License:Apache License
@Test public void test() throws Exception { String INPUT = getClass().getCanonicalName() + "-input"; String OUTPUT = getClass().getCanonicalName() + "-output"; int NUM_ROWS = 50; Configuration hConf = new Configuration(); FileSystem fs = FileSystem.get(hConf); fs.delete(new Path(INPUT), true); fs.delete(new Path(OUTPUT), true); Class.forName(driverName);/*from ww w. ja v a 2s . c om*/ Connection con; if (standAloneServer) { // get connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", "", ""); } else { con = DriverManager.getConnection("jdbc:hive://", "", ""); } // Writing a file with tuples TupleFile.Writer writer = new TupleFile.Writer(fs, hConf, new Path(INPUT), schema); Random rand = new Random(1); Tuple tuple = new Tuple(schema); for (int i = 0; i < NUM_ROWS; i++) { writer.append(fillTuple(rand, tuple)); } writer.close(); Statement stmt = con.createStatement(); try { stmt.executeQuery("drop table pangool_test"); } catch (Exception e) { // Do nothing. Probably the table don't exist. } String create_table = "create external table TABLENAME (" + "cint int, clong bigint, cfloat float, cdouble double, cstring string, cboolean boolean, " + "cenum int, cbytes binary " + "ROW FORMAT SERDE 'com.datasalt.pangool.hive.PangoolHiveSerDe' " + "STORED AS INPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveInputFormat' " + "OUTPUTFORMAT 'com.datasalt.pangool.hive.PangoolHiveoOutputFormat' " + "TBLPROPERTIES ('schema.name'='myschema' "; String create_table1 = create_table.replace("TABLENAME", "table1") + "LOCATION '" + INPUT + "');"; String create_table2 = create_table.replace("TABLENAME", "table2") + "LOCATION '" + OUTPUT + "');"; stmt.executeQuery(); stmt.executeQuery("load data inpath "); TupleFile.Reader readerSource = new TupleFile.Reader(fs, hConf, new Path(INPUT)); TupleFile.Reader readerTarget = new TupleFile.Reader(fs, hConf, new Path(OUTPUT)); ITuple sourceTuple = null; ITuple targetTuple = null; while (readerSource.next(sourceTuple)) { readerTarget.next(targetTuple); assertEqualTuples(sourceTuple, targetTuple); } assertFalse(readerTarget.next(tuple)); readerSource.close(); readerTarget.close(); stmt.executeQuery("drop table pangool_test"); stmt.close(); con.close(); fs.delete(new Path(INPUT), true); fs.delete(new Path(OUTPUT), true); }
From source file:com.datasalt.pangool.tuplemr.mapred.TestCombiner.java
License:Apache License
public TupleMRBuilder getBuilder(Configuration conf, String input, String output) throws TupleMRException, IOException { FileSystem fs = FileSystem.get(conf); fs.delete(new Path(output), true); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("word", Type.STRING)); fields.add(Field.create("count", Type.INT)); TupleMRBuilder cg = new TupleMRBuilder(conf); cg.addIntermediateSchema(new Schema("schema", fields)); cg.setJarByClass(TestCombiner.class); cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split()); cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class, IntWritable.class); cg.setGroupByFields("word"); cg.setOrderBy(new OrderBy().add("word", Order.ASC)); cg.setTupleReducer(new Count()); cg.setTupleCombiner(new CountCombiner()); return cg;//from w ww . j a va 2 s .co m }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Utility method for serializing an object and saving it in the Distributed Cache. * <p>// ww w. ja va 2s . c o m * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir")); if (!hadoopTmpDir.exists()) { hadoopTmpDir.mkdir(); } File file = new File(hadoopTmpDir, serializeToLocalFile); FileSystem fS = FileSystem.get(conf); ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file)); out.writeObject(obj); out.close(); if (fS.equals(FileSystem.getLocal(conf))) { return; } String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF); if (tmpHdfsFolder == null) { // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. This way, tasktrackers // that are being run as different user will still be able to locate this folder tmpHdfsFolder = conf.get("hadoop.tmp.dir"); conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder); } Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
public static void deleteIfExists(FileSystem dFs, Path path) throws IOException { if (dFs.exists(path)) { dFs.delete(path, true); }/*from w ww . j av a2s . co m*/ }
From source file:com.datasalt.pangool.utils.InstancesDistributor.java
License:Apache License
/** * Utility method for serializing an object and saving it in a way that later can be recovered * anywhere in the cluster.//from w w w . j a v a 2 s. c om * <p> * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link InstancesDistributor#loadInstance(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param fileName The file name where the instance will be serialized. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void distribute(Object obj, String fileName, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { FileSystem fS = FileSystem.get(conf); // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. // The default value can be changed by a user-provided one. String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF, DEFAULT_HDFS_TMP_FOLDER_CONF_VALUE); Path toHdfs = new Path(tmpHdfsFolder, fileName); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } ObjectOutput out = new ObjectOutputStream(fS.create(toHdfs)); out.writeObject(obj); out.close(); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datasalt.pangool.utils.InstancesDistributor.java
License:Apache License
/** * Delete a file that has been distributed using {@link #distribute(Object, String, Configuration)}. *//*w w w . ja v a 2 s .c o m*/ public static void removeFromCache(Configuration conf, String filename) throws IOException { FileSystem fS = FileSystem.get(conf); fS.delete(locateFileInCache(conf, filename), true); }