List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); // find new start/ends of the filesplit that aligns // with the lzo blocks List<InputSplit> result = new ArrayList<InputSplit>(); FileSystem fs = FileSystem.get(job.getConfiguration()); for (InputSplit genericSplit : splits) { // load the index FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); }/* w ww. j av a 2 s. c o m*/ if (index.isEmpty()) { // empty index, keep as is result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); if (start != 0) { // find the next block position from // the start of the split long newStart = index.findNextPosition(start); if (newStart == -1 || newStart >= end) { // just skip this since it will be handled by another split continue; } start = newStart; } long newEnd = index.findNextPosition(end); if (newEnd != -1) { end = newEnd; } else { //didn't find the next position //we have hit the end of the file end = fs.getFileStatus(file).getLen(); } result.add(new FileSplit(file, start, end - start, fileSplit.getLocations())); } return result; }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
/** * Read the index of the lzo file./* w w w.ja va 2 s. c o m*/ * * @param split * Read the index of this file. * @param fs * The index file is on this file system. * @throws IOException */ private LzoIndex readIndex(Path file, FileSystem fs) throws IOException { FSDataInputStream indexIn = null; try { Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX); if (!fs.exists(indexFile)) { // return empty index, fall back to the unsplittable mode return new LzoIndex(); } long indexLen = fs.getFileStatus(indexFile).getLen(); int blocks = (int) (indexLen / 8); LzoIndex index = new LzoIndex(blocks); indexIn = fs.open(indexFile); for (int i = 0; i < blocks; i++) { index.set(i, indexIn.readLong()); } return index; } finally { if (indexIn != null) { indexIn.close(); } } }
From source file:com.hazelcast.yarn.YarnUtil.java
License:Open Source License
public static LocalResource createFileResource(Path file, FileSystem fs, LocalResourceType type) throws Exception { LocalResource resource = Records.newRecord(LocalResource.class); file = fs.makeQualified(file);//from w ww . j a v a 2s . c o m FileStatus stat = fs.getFileStatus(file); resource.setResource(ConverterUtils.getYarnUrlFromPath(file)); resource.setSize(stat.getLen()); resource.setTimestamp(stat.getModificationTime()); resource.setType(type); resource.setVisibility(LocalResourceVisibility.APPLICATION); return resource; }
From source file:com.hdfstoftp.main.HdfsToFtp.java
/** * ?//from w ww . ja va2 s . c o m * * @param srcFS * * @param src * ? * @param dst * * @param queryStr * * @param deleteSource * ?? * @param overwrite * ???? * @return boolean * @throws Exception */ private static boolean copyFromHDFSToFTP(Config config) throws Exception { // ?hdfs Configuration conf = new Configuration(); FileSystem srcFS = FileSystem.get(conf); long start = System.currentTimeMillis(); boolean isRename = config.isRenameUploaded(); int retryTimes = config.getRetryTimes(); // ? String dstPath = config.getDestDir(); Path src = new Path(config.getSouceDir()); FileStatus fileStatus = srcFS.getFileStatus(src); String subDir = null; if (fileStatus.isDirectory()) {// if (isRename) {// ??rename subDir = Config.RENAME_DIR; srcFS.mkdirs(new Path(fileStatus.getPath(), subDir)); } int threadNum = config.getThreadNum(); // ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); // ?ftp FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig())); FTPClient ftpClient = ftpPool.borrowObject(); // ? ftpClient.makeDirectory(dstPath); ftpPool.returnObject(ftpClient); // ?? FileStatus contents[] = srcFS.listStatus(src); long beginFilter = 0; long endFileter = 0; if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h") || config.getCommandLine().hasOption("t")) {// ?"[" beginFilter = System.currentTimeMillis(); Long[] timeRange = parseTimeRange(config.getCommandLine()); contents = getNewContents(timeRange, contents); endFileter = System.currentTimeMillis(); } // ? if (config.getCommandLine().hasOption("r")) {// "["?? beginFilter = System.currentTimeMillis(); contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents); endFileter = System.currentTimeMillis(); } logger.info("total file count:" + contents.length); Map<String, String> fileNameMap = null; long beginSkip = 0; long endSkip = 0; boolean overwrite = true; if (config.getCommandLine().hasOption("o")) { overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim()); } if (!overwrite) {// ????? beginSkip = System.currentTimeMillis(); fileNameMap = getFileNameMap(dstPath, ftpPool); endSkip = System.currentTimeMillis(); } int skiped = 0; List<Future<?>> futureList = new ArrayList<Future<?>>(); for (int i = 0; i < contents.length; i++) { if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) { // skiped++; Log.info("skiped filename:" + contents[i].getPath().getName()); continue; } if (contents[i].isDirectory()) { continue; } // ??? Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(), new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir, retryTimes)); futureList.add(future); } int transfered = 0; int failed = 0; for (Future<?> future : futureList) { Boolean computeResult = (Boolean) future.get(); if (computeResult) { transfered++; if (transfered % 50 == 0 || transfered == contents.length) { logger.info("have transfered:" + transfered + " files"); } } else { failed++; logger.error("failed transter:" + failed + " files"); } } // threadPool.shutdown(); // FTPCient ftpPool.close(); // **************** logger.info("filter time:" + (endFileter - beginFilter) + " ms"); if (!overwrite) { logger.info("skip time:" + (endSkip - beginSkip) + " ms"); } logger.info("total file count:" + contents.length); logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped); } else {// BufferedReader reader = null; FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig()); FTPClient ftpClient = null; InputStream in = null; try { Path path = fileStatus.getPath(); if (!path.getName().contains("log")) { } reader = new BufferedReader(new FileReader(new File(path.toUri().getPath()))); String str = null; ftpClient = facotry.makeObject(); while ((str = reader.readLine()) != null) { String[] feilds = str.split("&"); Path filePath = null; if (feilds.length == 2 && feilds[1] != "") { filePath = new Path(feilds[1]); in = srcFS.open(filePath); boolean result = ftpClient.storeFile(dstPath, in); System.out.println(ftpClient.getReplyCode()); if (result) { logger.info(filePath.toString()); } else { logger_failed.info(filePath.toString()); } } else { continue; } } } catch (Exception e) { e.printStackTrace(); } finally { in.close(); reader.close(); facotry.destroyObject(ftpClient); } } long end = System.currentTimeMillis(); logger.info("finished transfer,total time:" + (end - start) / 1000 + "s"); return true; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
/** * //w w w.j a v a 2 s. com * @param filename * @return * @throws LanguageException */ public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException { JSONObject retVal = null; boolean exists = false; FileSystem fs = null; try { fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); } catch (Exception e) { raiseValidateError("could not read the configuration file: " + e.getMessage(), false); } Path pt = new Path(filename); try { if (fs.exists(pt)) { exists = true; } } catch (Exception e) { exists = false; } boolean isDirBoolean = false; try { if (exists && fs.getFileStatus(pt).isDirectory()) isDirBoolean = true; else isDirBoolean = false; } catch (Exception e) { raiseValidateError( "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(), conditional); } // CASE: filename is a directory -- process as a directory if (exists && isDirBoolean) { // read directory contents retVal = new JSONObject(); FileStatus[] stats = null; try { stats = fs.listStatus(pt); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading directory with MTD file " + pt.toString() + ": " + e.getMessage(), conditional); } for (FileStatus stat : stats) { Path childPath = stat.getPath(); // gives directory name if (childPath.getName().startsWith("part")) { BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(childPath))); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } JSONObject childObj = null; try { childObj = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("for MTD file in directory, error parsing part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } for (Object obj : childObj.entrySet()) { @SuppressWarnings("unchecked") Entry<Object, Object> e = (Entry<Object, Object>) obj; Object key = e.getKey(); Object val = e.getValue(); retVal.put(key, val); } } } // end for } // CASE: filename points to a file else if (exists) { BufferedReader br = null; // try reading MTD file try { br = new BufferedReader(new InputStreamReader(fs.open(pt))); } catch (Exception e) { raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } // try parsing MTD file try { retVal = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } } return retVal; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
public String[] readMatrixMarketFile(String filename, boolean conditional) throws LanguageException { String[] retVal = new String[2]; retVal[0] = new String(""); retVal[1] = new String(""); boolean exists = false; try {// w w w . ja va2 s.c o m FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); Path pt = new Path(filename); if (fs.exists(pt)) { exists = true; } boolean getFileStatusIsDir = fs.getFileStatus(pt).isDirectory(); if (exists && getFileStatusIsDir) { raiseValidateError("MatrixMarket files as directories not supported", conditional); } else if (exists) { BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); try { retVal[0] = in.readLine(); // skip all commented lines do { retVal[1] = in.readLine(); } while (retVal[1].charAt(0) == '%'); if (!retVal[0].startsWith("%%")) { raiseValidateError("MatrixMarket files must begin with a header line.", conditional); } } finally { if (in != null) in.close(); } } else { raiseValidateError("Could not find the file: " + filename, conditional); } } catch (IOException e) { //LOG.error(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename ); //throw new LanguageException(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename ); throw new LanguageException(e); } return retVal; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
public boolean checkHasMatrixMarketFormat(String inputFileName, String mtdFileName, boolean conditional) throws LanguageException { // Check the MTD file exists. if there is an MTD file, return false. JSONObject mtdObject = readMetadataFile(mtdFileName, conditional); if (mtdObject != null) return false; boolean exists = false; FileSystem fs = null; try {//from www .j a va 2 s . co m fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); } catch (Exception e) { LOG.error(this.printErrorLocation() + "could not read the configuration file."); throw new LanguageException(this.printErrorLocation() + "could not read the configuration file.", e); } Path pt = new Path(inputFileName); try { if (fs.exists(pt)) { exists = true; } } catch (Exception e) { LOG.error(this.printErrorLocation() + "file " + inputFileName + " not found"); throw new LanguageException(this.printErrorLocation() + "file " + inputFileName + " not found"); } try { // CASE: filename is a directory -- process as a directory if (exists && fs.getFileStatus(pt).isDirectory()) { // currently, only MM files as files are supported. So, if file is directory, then infer // likely not MM file return false; } // CASE: filename points to a file else if (exists) { //BufferedReader in = new BufferedReader(new FileReader(filename)); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); String headerLine = new String(""); if (in.ready()) headerLine = in.readLine(); in.close(); // check that headerline starts with "%%" // will infer malformed if (headerLine != null && headerLine.startsWith("%%")) return true; else return false; } else { return false; } } catch (Exception e) { return false; } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java
License:Open Source License
/** * Get the list of hostnames where the input split is located. *//*from w w w. j ava 2 s. c o m*/ @Override public String[] getLocations() throws IOException { //Timing time = new Timing(); //time.start(); JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); //read task string LongWritable key = new LongWritable(); Text value = new Text(); RecordReader<LongWritable, Text> reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL); reader.next(key, value); reader.close(); //parse task Task t = Task.parseCompactString(value.toString()); //get all locations HashMap<String, Integer> hosts = new HashMap<String, Integer>(); if (t.getType() == TaskType.SET) { for (IntObject val : t.getIterations()) { String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } } else //TaskType.RANGE { //since this is a serial process, we use just the first iteration //as a heuristic for location information long lFrom = t.getIterations().get(0).getLongValue(); long lTo = t.getIterations().get(1).getLongValue(); for (long li : new long[] { lFrom, lTo }) { String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } /* int lFrom = t.getIterations().get(0).getIntValue(); int lTo = t.getIterations().get(1).getIntValue(); int lIncr = t.getIterations().get(2).getIntValue(); for( int i=lFrom; i<=lTo; i+=lIncr ) { String fname = _fname+"/"+String.valueOf( ((i-_offset)/_blen+_offset) ); FileSystem fs = FileSystem.get(job); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for( BlockLocation bl : tmp1 ) countHosts(hosts, bl.getHosts()); }*/ } //System.out.println("Get locations "+time.stop()+""); //majority consensus on top host return getTopHosts(hosts); }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
/** * //from w w w.j av a2s . c o m * @param srcFileName * @param fileName * @param rlen * @param clen * @param nnz * @throws IOException */ public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path src = new Path(srcFileName); Path merge = new Path(fileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(merge)) { hdfs.delete(merge, true); } OutputStream out = hdfs.create(merge, true); // write out the header first StringBuilder sb = new StringBuilder(); sb.append("%%MatrixMarket matrix coordinate real general\n"); // output number of rows, number of columns and number of nnz sb.append(rlen + " " + clen + " " + nnz + "\n"); out.write(sb.toString().getBytes()); // if the source is a directory if (hdfs.getFileStatus(src).isDirectory()) { try { FileStatus[] contents = hdfs.listStatus(src); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { InputStream in = hdfs.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); } finally { IOUtilFunctions.closeSilently(in); } } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(src)) { InputStream in = null; try { in = hdfs.open(src); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(src.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
/** * /*from ww w . ja v a 2 s .c o m*/ * @param context * @param fs * @param taskOutput * @throws IOException */ private void moveFinalTaskOutputs(TaskAttemptContext context, FileSystem fs, Path taskOutput) throws IOException { context.getProgressible().progress(); if (fs.getFileStatus(taskOutput).isDirectory()) { FileStatus[] files = fs.listStatus(taskOutput); if (files != null) for (FileStatus file : files) //for all files if (!file.isDirectory()) //skip directories moveFileToDestination(context, fs, file.getPath()); } }