Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    // find new start/ends of the filesplit that aligns
    // with the lzo blocks

    List<InputSplit> result = new ArrayList<InputSplit>();
    FileSystem fs = FileSystem.get(job.getConfiguration());

    for (InputSplit genericSplit : splits) {
        // load the index
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }/*  w ww. j av  a 2  s. c o m*/

        if (index.isEmpty()) {
            // empty index, keep as is
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        if (start != 0) {
            // find the next block position from
            // the start of the split
            long newStart = index.findNextPosition(start);
            if (newStart == -1 || newStart >= end) {
                // just skip this since it will be handled by another split
                continue;
            }
            start = newStart;
        }

        long newEnd = index.findNextPosition(end);
        if (newEnd != -1) {
            end = newEnd;
        } else {
            //didn't find the next position
            //we have hit the end of the file
            end = fs.getFileStatus(file).getLen();
        }

        result.add(new FileSplit(file, start, end - start, fileSplit.getLocations()));
    }

    return result;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Read the index of the lzo file./*  w w  w.ja  va 2  s.  c o m*/
 * 
 * @param split
 *          Read the index of this file.
 * @param fs
 *          The index file is on this file system.
 * @throws IOException
 */
private LzoIndex readIndex(Path file, FileSystem fs) throws IOException {
    FSDataInputStream indexIn = null;
    try {
        Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX);
        if (!fs.exists(indexFile)) {
            // return empty index, fall back to the unsplittable mode
            return new LzoIndex();
        }

        long indexLen = fs.getFileStatus(indexFile).getLen();
        int blocks = (int) (indexLen / 8);
        LzoIndex index = new LzoIndex(blocks);
        indexIn = fs.open(indexFile);
        for (int i = 0; i < blocks; i++) {
            index.set(i, indexIn.readLong());
        }
        return index;
    } finally {
        if (indexIn != null) {
            indexIn.close();
        }
    }
}

From source file:com.hazelcast.yarn.YarnUtil.java

License:Open Source License

public static LocalResource createFileResource(Path file, FileSystem fs, LocalResourceType type)
        throws Exception {
    LocalResource resource = Records.newRecord(LocalResource.class);

    file = fs.makeQualified(file);//from w  ww . j  a v a  2s  . c o  m
    FileStatus stat = fs.getFileStatus(file);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(file));
    resource.setSize(stat.getLen());
    resource.setTimestamp(stat.getModificationTime());
    resource.setType(type);
    resource.setVisibility(LocalResourceVisibility.APPLICATION);
    return resource;
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ?//from   w  ww .  ja va2  s . c  o m
 * 
 * @param srcFS
 *            
 * @param src
 *            ?
 * @param dst
 *            
 * @param queryStr
 *            
 * @param deleteSource
 *            ??
 * @param overwrite
 *            ????
 * @return boolean
 * @throws Exception
 */
private static boolean copyFromHDFSToFTP(Config config) throws Exception {
    // ?hdfs
    Configuration conf = new Configuration();
    FileSystem srcFS = FileSystem.get(conf);
    long start = System.currentTimeMillis();
    boolean isRename = config.isRenameUploaded();
    int retryTimes = config.getRetryTimes();
    // ?
    String dstPath = config.getDestDir();
    Path src = new Path(config.getSouceDir());
    FileStatus fileStatus = srcFS.getFileStatus(src);
    String subDir = null;
    if (fileStatus.isDirectory()) {// 
        if (isRename) {// ??rename
            subDir = Config.RENAME_DIR;
            srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
        }
        int threadNum = config.getThreadNum();
        // 
        ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        // ?ftp
        FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
        FTPClient ftpClient = ftpPool.borrowObject();
        // ?
        ftpClient.makeDirectory(dstPath);
        ftpPool.returnObject(ftpClient);
        // ??
        FileStatus contents[] = srcFS.listStatus(src);
        long beginFilter = 0;
        long endFileter = 0;

        if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                || config.getCommandLine().hasOption("t")) {// ?"["
            beginFilter = System.currentTimeMillis();
            Long[] timeRange = parseTimeRange(config.getCommandLine());
            contents = getNewContents(timeRange, contents);
            endFileter = System.currentTimeMillis();
        }
        // ?
        if (config.getCommandLine().hasOption("r")) {// "["??
            beginFilter = System.currentTimeMillis();
            contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
            endFileter = System.currentTimeMillis();
        }
        logger.info("total file count:" + contents.length);
        Map<String, String> fileNameMap = null;
        long beginSkip = 0;
        long endSkip = 0;
        boolean overwrite = true;
        if (config.getCommandLine().hasOption("o")) {
            overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
        }
        if (!overwrite) {// ?????
            beginSkip = System.currentTimeMillis();
            fileNameMap = getFileNameMap(dstPath, ftpPool);
            endSkip = System.currentTimeMillis();
        }
        int skiped = 0;

        List<Future<?>> futureList = new ArrayList<Future<?>>();
        for (int i = 0; i < contents.length; i++) {
            if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                // 
                skiped++;
                Log.info("skiped filename:" + contents[i].getPath().getName());
                continue;
            }
            if (contents[i].isDirectory()) {
                continue;
            }
            // ???
            Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                    new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                    retryTimes));
            futureList.add(future);
        }
        int transfered = 0;
        int failed = 0;
        for (Future<?> future : futureList) {
            Boolean computeResult = (Boolean) future.get();
            if (computeResult) {
                transfered++;
                if (transfered % 50 == 0 || transfered == contents.length) {
                    logger.info("have transfered:" + transfered + " files");
                }
            } else {
                failed++;
                logger.error("failed transter:" + failed + " files");
            }
        }
        // 
        threadPool.shutdown();
        // FTPCient
        ftpPool.close();
        // ****************
        logger.info("filter time:" + (endFileter - beginFilter) + " ms");
        if (!overwrite) {
            logger.info("skip time:" + (endSkip - beginSkip) + " ms");
        }
        logger.info("total file count:" + contents.length);
        logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

    } else {// 

        BufferedReader reader = null;
        FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
        FTPClient ftpClient = null;
        InputStream in = null;
        try {
            Path path = fileStatus.getPath();
            if (!path.getName().contains("log")) {

            }
            reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
            String str = null;

            ftpClient = facotry.makeObject();

            while ((str = reader.readLine()) != null) {
                String[] feilds = str.split("&");
                Path filePath = null;
                if (feilds.length == 2 && feilds[1] != "") {
                    filePath = new Path(feilds[1]);
                    in = srcFS.open(filePath);
                    boolean result = ftpClient.storeFile(dstPath, in);
                    System.out.println(ftpClient.getReplyCode());
                    if (result) {
                        logger.info(filePath.toString());
                    } else {
                        logger_failed.info(filePath.toString());
                    }
                } else {
                    continue;
                }

            }
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            in.close();
            reader.close();
            facotry.destroyObject(ftpClient);
        }

    }
    long end = System.currentTimeMillis();
    logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
    return true;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

/**
 * //w w w.j a  v  a  2  s. com
 * @param filename
 * @return
 * @throws LanguageException
 */
public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException {
    JSONObject retVal = null;
    boolean exists = false;
    FileSystem fs = null;

    try {
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        raiseValidateError("could not read the configuration file: " + e.getMessage(), false);
    }

    Path pt = new Path(filename);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        exists = false;
    }

    boolean isDirBoolean = false;
    try {
        if (exists && fs.getFileStatus(pt).isDirectory())
            isDirBoolean = true;
        else
            isDirBoolean = false;
    } catch (Exception e) {
        raiseValidateError(
                "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(),
                conditional);
    }

    // CASE: filename is a directory -- process as a directory
    if (exists && isDirBoolean) {

        // read directory contents
        retVal = new JSONObject();

        FileStatus[] stats = null;

        try {
            stats = fs.listStatus(pt);
        } catch (Exception e) {
            raiseValidateError("for MTD file in directory, error reading directory with MTD file "
                    + pt.toString() + ": " + e.getMessage(), conditional);
        }

        for (FileStatus stat : stats) {
            Path childPath = stat.getPath(); // gives directory name
            if (childPath.getName().startsWith("part")) {

                BufferedReader br = null;
                try {
                    br = new BufferedReader(new InputStreamReader(fs.open(childPath)));
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error reading part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                JSONObject childObj = null;
                try {
                    childObj = JSONHelper.parse(br);
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error parsing part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                for (Object obj : childObj.entrySet()) {
                    @SuppressWarnings("unchecked")
                    Entry<Object, Object> e = (Entry<Object, Object>) obj;
                    Object key = e.getKey();
                    Object val = e.getValue();
                    retVal.put(key, val);
                }
            }
        } // end for 
    }

    // CASE: filename points to a file
    else if (exists) {

        BufferedReader br = null;

        // try reading MTD file
        try {
            br = new BufferedReader(new InputStreamReader(fs.open(pt)));
        } catch (Exception e) {
            raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }

        // try parsing MTD file
        try {
            retVal = JSONHelper.parse(br);
        } catch (Exception e) {
            raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }
    }

    return retVal;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public String[] readMatrixMarketFile(String filename, boolean conditional) throws LanguageException {
    String[] retVal = new String[2];
    retVal[0] = new String("");
    retVal[1] = new String("");
    boolean exists = false;

    try {//  w w  w  .  ja va2  s.c o  m
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        Path pt = new Path(filename);
        if (fs.exists(pt)) {
            exists = true;
        }

        boolean getFileStatusIsDir = fs.getFileStatus(pt).isDirectory();

        if (exists && getFileStatusIsDir) {
            raiseValidateError("MatrixMarket files as directories not supported", conditional);
        } else if (exists) {
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));
            try {
                retVal[0] = in.readLine();
                // skip all commented lines
                do {
                    retVal[1] = in.readLine();
                } while (retVal[1].charAt(0) == '%');

                if (!retVal[0].startsWith("%%")) {
                    raiseValidateError("MatrixMarket files must begin with a header line.", conditional);
                }
            } finally {
                if (in != null)
                    in.close();
            }
        } else {
            raiseValidateError("Could not find the file: " + filename, conditional);
        }

    } catch (IOException e) {
        //LOG.error(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        //throw new LanguageException(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        throw new LanguageException(e);
    }

    return retVal;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public boolean checkHasMatrixMarketFormat(String inputFileName, String mtdFileName, boolean conditional)
        throws LanguageException {
    // Check the MTD file exists. if there is an MTD file, return false.
    JSONObject mtdObject = readMetadataFile(mtdFileName, conditional);

    if (mtdObject != null)
        return false;

    boolean exists = false;
    FileSystem fs = null;

    try {//from   www  .j a va 2  s  .  co  m
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "could not read the configuration file.");
        throw new LanguageException(this.printErrorLocation() + "could not read the configuration file.", e);
    }

    Path pt = new Path(inputFileName);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "file " + inputFileName + " not found");
        throw new LanguageException(this.printErrorLocation() + "file " + inputFileName + " not found");
    }

    try {
        // CASE: filename is a directory -- process as a directory
        if (exists && fs.getFileStatus(pt).isDirectory()) {

            // currently, only MM files as files are supported.  So, if file is directory, then infer 
            // likely not MM file
            return false;
        }
        // CASE: filename points to a file
        else if (exists) {

            //BufferedReader in = new BufferedReader(new FileReader(filename));
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));

            String headerLine = new String("");
            if (in.ready())
                headerLine = in.readLine();
            in.close();

            // check that headerline starts with "%%"
            // will infer malformed 
            if (headerLine != null && headerLine.startsWith("%%"))
                return true;
            else
                return false;
        } else {
            return false;
        }

    } catch (Exception e) {
        return false;
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Open Source License

/**
 * Get the list of hostnames where the input split is located.
 *//*from   w  w w. j ava  2  s. c o m*/
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
    reader.next(key, value);
    reader.close();

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<String, Integer>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }

        /*
        int lFrom  = t.getIterations().get(0).getIntValue();
        int lTo    = t.getIterations().get(1).getIntValue();
        int lIncr  = t.getIterations().get(2).getIntValue();            
        for( int i=lFrom; i<=lTo; i+=lIncr )
        {
           String fname = _fname+"/"+String.valueOf( ((i-_offset)/_blen+_offset) );
           FileSystem fs = FileSystem.get(job);
           FileStatus status = fs.getFileStatus(new Path(fname)); 
           BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
           for( BlockLocation bl : tmp1 )
              countHosts(hosts, bl.getHosts());
        }*/
    }

    //System.out.println("Get locations "+time.stop()+"");

    //majority consensus on top host
    return getTopHosts(hosts);
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * //from w  w  w.j  av  a2s .  c o  m
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

/**
 * /*from  ww  w  . ja  v  a  2 s  .c o m*/
 * @param context
 * @param fs
 * @param taskOutput
 * @throws IOException
 */
private void moveFinalTaskOutputs(TaskAttemptContext context, FileSystem fs, Path taskOutput)
        throws IOException {
    context.getProgressible().progress();

    if (fs.getFileStatus(taskOutput).isDirectory()) {
        FileStatus[] files = fs.listStatus(taskOutput);
        if (files != null)
            for (FileStatus file : files) //for all files
                if (!file.isDirectory()) //skip directories
                    moveFileToDestination(context, fs, file.getPath());
    }
}