List of usage examples for org.apache.hadoop.conf Configuration Configuration
public Configuration()
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startCreatePrefixGroups(FIMOptions opt, int phase) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Path path = new Path(opt.outputDir + separator + "tg" + phase); for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) { String cacheFile = status.getPath().toString(); String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length()); trieGroupCount = trieGroupCount.split("-")[1]; String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount; System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir + ", cache: " + cacheFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Create Prefix Groups"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1);/*from ww w.ja va 2 s . c om*/ Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); conf.setInt(PREFIX_LENGTH_KEY, phase); addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Prefix Creation"); } }
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java
License:Apache License
private void getPgStartIndex(Configuration conf) { try {/*from w ww .j ava 2 s . c om*/ Path path = new Path(basePGDir); FileSystem fs = path.getFileSystem(new Configuration()); if (!fs.exists(path)) { pgStartIndex = 0; return; } int largestIx = 0; for (FileStatus file : fs.listStatus(path)) { String tmp = file.getPath().toString(); if (!tmp.contains("bucket")) { continue; } tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length()); int ix = Integer.parseInt(tmp.split("-")[1]); largestIx = Math.max(largestIx, ix); pgStartIndex += 1; } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java
License:Apache License
private Configuration createConfiguration() throws Exception { Configuration conf = new Configuration(); conf.setInt(MIN_SUP_KEY, 1);/*w w w. ja va 2s. c o m*/ conf.setInt(NUMBER_OF_MAPPERS_KEY, 2); conf.setInt(SUBDB_SIZE, 10); conf.setStrings(OUTPUT_DIR_KEY, "file:///out"); return conf; }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
private Configuration createConfiguration(int... numberOfLines) { Configuration conf = new Configuration(); if (numberOfLines.length > 0) { conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]); }/*from www . j a va 2 s .c om*/ conf.set("fs.default.name", "file:///"); conf.setBoolean("fs.file.impl.disable.cache", false); conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class); return conf; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
/** * Cleans the Hadoop file system by deleting the specified files if they exist. * // ww w. j av a 2 s. c om * @param files * the files to delete */ public static void cleanDirs(String... files) { System.out.println("[Cleaning]: Cleaning HDFS"); Configuration conf = new Configuration(); for (String filename : files) { System.out.println("[Cleaning]: Trying to delete " + filename); Path path = new Path(filename); try { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { if (fs.delete(path, true)) { System.out.println("[Cleaning]: Deleted " + filename); } else { System.out.println("[Cleaning]: Error while deleting " + filename); } } else { System.out.println("[Cleaning]: " + filename + " does not exist on HDFS"); } } catch (IOException e) { e.printStackTrace(); } } }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
@SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration()); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from ww w . j av a 2 s . co m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) { Path path = new Path(dir); try {//from w w w .j a va 2 s . c om for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) { String[] sp = fs.getPath().toString().split(Path.SEPARATOR); String filename = sp[sp.length - 1]; if (toKeep.contains(filename)) { cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS"); continue; } cleanDirs(fs.getPath().toString()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.ugent.intec.halvade.Halvade.java
License:Open Source License
public static void runMapReduce(String[] args) { int res = 0;/*from w w w .j a va2s.c o m*/ try { Configuration c = new Configuration(); MapReduceRunner runner = new MapReduceRunner(); res = ToolRunner.run(c, runner, args); } catch (Exception ex) { Logger.EXCEPTION(ex); } System.exit(res); }
From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java
License:Open Source License
/** * @param args the command line arguments *//*from w w w .ja v a 2s .c o m*/ public static void main(String[] args) throws Exception { // TODO code application logic here Configuration c = new Configuration(); HalvadeUploader hau = new HalvadeUploader(); int res = ToolRunner.run(c, hau, args); }
From source file:be.ugent.intec.halvade.uploader.input.BaseFileReader.java
protected static BufferedReader getReader(boolean readFromDistributedStorage, String file) throws FileNotFoundException, IOException { InputStream hdfsIn;//w w w. ja va 2s. c o m if (readFromDistributedStorage) { Path pt = new Path(file); FileSystem fs = FileSystem.get(pt.toUri(), new Configuration()); hdfsIn = fs.open(pt); // read the stream in the correct format! if (file.endsWith(".gz")) { GZIPInputStream gzip = new GZIPInputStream(hdfsIn, BUFFERSIZE); return new BufferedReader(new InputStreamReader(gzip)); } else if (file.endsWith(".bz2")) { CBZip2InputStream bzip2 = new CBZip2InputStream(hdfsIn); return new BufferedReader(new InputStreamReader(bzip2)); } else return new BufferedReader(new InputStreamReader(hdfsIn)); } else { if (file.endsWith(".gz")) { GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file), BUFFERSIZE); return new BufferedReader(new InputStreamReader(gzip)); } else if (file.endsWith(".bz2")) { CBZip2InputStream bzip2 = new CBZip2InputStream(new FileInputStream(file)); return new BufferedReader(new InputStreamReader(bzip2)); } else if (file.equals("-")) { return new BufferedReader(new InputStreamReader(System.in)); } else return new BufferedReader(new FileReader(file)); } }