List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.kylinolap.dict.DictionaryManager.java
License:Apache License
private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException { InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt"); if (in == null) // data set resource not found return null; ByteArrayOutputStream buf = new ByteArrayOutputStream(); IOUtils.copy(in, buf);//from www. j a v a2 s .co m in.close(); byte[] bytes = buf.toByteArray(); Path tmpDataSetPath = new Path( tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt"); FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir); boolean writtenNewFile = false; if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) { fs.mkdirs(tmpDataSetPath.getParent()); FSDataOutputStream out = fs.create(tmpDataSetPath); IOUtils.copy(new ByteArrayInputStream(bytes), out); out.close(); writtenNewFile = true; } String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString(); if (writtenNewFile) logger.info("Dictionary temp data set file written to " + qualifiedPath); return qualifiedPath; }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsReducer.java
License:Apache License
@Override public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { TblColRef col = columnList.get(key.get()); HashSet<ByteArray> set = new HashSet<ByteArray>(); for (Text textValue : values) { ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength())); set.add(value);//w ww .j ava2 s. c om } Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get(BatchConstants.OUTPUT_PATH); FSDataOutputStream out = fs.create(new Path(outputPath, col.getName())); try { for (ByteArray value : set) { out.write(value.data); out.write('\n'); } } finally { out.close(); } }
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsReducer.java
License:Apache License
@Override public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String columnName = columns[key.get()]; HashSet<ByteArray> set = new HashSet<ByteArray>(); for (Text textValue : values) { ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength())); set.add(value);/* w ww . j a v a 2s.c o m*/ } Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get(BatchConstants.OUTPUT_PATH); FSDataOutputStream out = fs.create(new Path(outputPath, columnName)); try { for (ByteArray value : set) { out.write(value.data); out.write('\n'); } } finally { out.close(); } }
From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java
License:Apache License
public static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem, Set<String> oldJarPaths) throws IOException { Path uploadPath = null;//from ww w. j a v a2 s .c o m File localCoprocessorFile = new File(localCoprocessorJar); // check existing jars if (oldJarPaths == null) { oldJarPaths = new HashSet<String>(); } Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv()); for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) { if (fileStatus.getLen() == localCoprocessorJar.length() && fileStatus.getModificationTime() == localCoprocessorFile.lastModified()) { uploadPath = fileStatus.getPath(); break; } String filename = fileStatus.getPath().toString(); if (filename.endsWith(".jar")) { oldJarPaths.add(filename); } } // upload if not existing if (uploadPath == null) { // figure out a unique new jar file name Set<String> oldJarNames = new HashSet<String>(); for (String path : oldJarPaths) { oldJarNames.add(new Path(path).getName()); } String baseName = getBaseFileName(localCoprocessorJar); String newName = null; int i = 0; while (newName == null) { newName = baseName + "-" + (i++) + ".jar"; if (oldJarNames.contains(newName)) newName = null; } // upload uploadPath = new Path(coprocessorDir, newName); FileInputStream in = null; FSDataOutputStream out = null; try { in = new FileInputStream(localCoprocessorFile); out = fileSystem.create(uploadPath); IOUtils.copy(in, out); } finally { IOUtils.closeQuietly(in); IOUtils.closeQuietly(out); } fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), System.currentTimeMillis()); } uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null); return uploadPath; }
From source file:com.liferay.hadoop.action.HadoopJob.java
License:Open Source License
public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception { response.setContentType(ContentTypes.TEXT_PLAIN_UTF8); PrintWriter writer = response.getWriter(); FileSystem fileSystem = HadoopManager.getFileSystem(); JobClient jobClient = HadoopManager.getJobClient(); writer.println("-- Job Status --"); Path inputPath = new Path("/index/*/*"); Path outputPath = new Path("/wordcount/results"); try {/*from w ww . j a v a 2 s. c o m*/ if (_runningJob == null) { writer.println("Creating job"); if (fileSystem.exists(_jobPath)) { fileSystem.delete(_jobPath, false); } if (!fileSystem.exists(_jobPath)) { writer.println("Deploying the job code to cluster"); FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); ServletContext servletContext = HadoopManager.getServletContext(); InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } writer.println("Job code deployed to cluster"); } if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = HadoopManager.createNewJobConf(); _jobConf.setJobName("Word Count"); writer.println("Job '" + _jobConf.getJobName() + "' is being configured"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); writer.println("Job code deployed to distributed cache's classpath"); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } int jobState = _runningJob.getJobState(); writer.println( "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)"); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { writer.println("Re-issuing the job"); if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } } catch (Exception ioe) { writer.println("Job error: "); ioe.printStackTrace(writer); } writer.flush(); writer.close(); return null; }
From source file:com.liferay.hadoop.search.HadoopDLIndexerPostProcessor.java
License:Open Source License
public void postProcessDocument(Document document, Object obj) throws Exception { DLFileEntry dlFileEntry = (DLFileEntry) obj; long companyId = dlFileEntry.getCompanyId(); long repositoryId = dlFileEntry.getRepositoryId(); String stringObject = document.toString(); // remove JSON chars stringObject = StringUtil.replace(stringObject, new String[] { "\"", ",", ":", "{", "}", "[", "]" }, new String[] { StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE, StringPool.SPACE }); Path fullDirPath = HadoopManager.getFullDirPath(companyId, repositoryId, null); fullDirPath = new Path("/index".concat(fullDirPath.toString())); FSDataOutputStream outputStream = null; try {/*from w w w. ja v a2 s.com*/ FileSystem fileSystem = HadoopManager.getFileSystem(); String suffix = StringPool.SLASH.concat(document.getUID()); outputStream = fileSystem.create(fullDirPath.suffix(suffix)); PrintWriter pw = new PrintWriter(outputStream); pw.write(stringObject); pw.flush(); pw.close(); } catch (IOException e) { e.printStackTrace(); } finally { StreamUtil.cleanUp(outputStream); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void addFile(long companyId, long repositoryId, String fileName, InputStream is) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); FSDataOutputStream outputStream = null; try {//from w ww .j a v a 2s .c o m FileSystem fileSystem = HadoopManager.getFileSystem(); outputStream = fileSystem.create(fullPath); StreamUtil.transfer(is, outputStream, false); } catch (IOException ioe) { throw new SystemException(ioe); } finally { StreamUtil.cleanUp(outputStream); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void updateFile(long companyId, long repositoryId, String fileName, String versionLabel, InputStream inputStream) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); FSDataOutputStream outputStream = null; try {//from w w w . ja v a 2 s . c o m FileSystem fileSystem = HadoopManager.getFileSystem(); outputStream = fileSystem.create(fullPath); StreamUtil.transfer(inputStream, outputStream, false); } catch (IOException ioe) { throw new SystemException(ioe); } finally { StreamUtil.cleanUp(outputStream); } }
From source file:com.liferay.hadoop.util.HadoopManager.java
License:Open Source License
public static void runJob(StoreEvent storeEvent) throws IOException { FileSystem fileSystem = getFileSystem(); if (_servletContext == null) { return;/*w w w. j av a2 s .c o m*/ } JobClient jobClient = getJobClient(); Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*")); Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results")); try { if (_runningJob == null) { if (!fileSystem.exists(_jobPath)) { FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); InputStream inputStream = _servletContext .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } } if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = new JobConf(_sharedJobConf); _jobConf.setJobName("Word Count"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); _runningJob = jobClient.submitJob(_jobConf); } int jobState = _runningJob.getJobState(); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { System.out.println("Re-issuing the word count job."); if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _runningJob = jobClient.submitJob(_jobConf); } } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:com.linkedin.cubert.io.avro.AvroTeeWriter.java
License:Open Source License
@Override public void open(Configuration conf, JsonNode json, BlockSchema schema, Path root, String filename) throws IOException { Path teePath = new Path(root, filename + ".avro"); FileSystem fs = FileSystem.get(conf); Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema); record = new Record(avroSchema); numColumns = schema.getNumColumns(); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema); dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); dataFileWriter.create(avroSchema, fs.create(teePath)); }