org.apache.sysml.runtime.util.MapReduceTool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.runtime.util.MapReduceTool.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.parser.DataExpression;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.io.MatrixReaderFactory;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MetaDataNumItemsByEachReducer;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.sort.ReadWithZeros;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.OrderedJSONObject;

public class MapReduceTool {
    private static final int MAX_DELETE_RETRIES = 10;

    private static final Log LOG = LogFactory.getLog(MapReduceTool.class.getName());

    public static String getUniqueKeyPerTask(JobConf job, boolean inMapper) {
        //TODO: investigate ID pattern, required for parallel jobs
        /*String nodePrefix = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
        return String.valueOf(IDHandler.extractLongID(nodePrefix));*/

        String nodePrefix = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
        int i;
        if (inMapper)
            i = nodePrefix.indexOf("_m_");
        else
            i = nodePrefix.indexOf("_r_");
        int j = nodePrefix.lastIndexOf("_");
        nodePrefix = nodePrefix.substring(i + 3, j);
        // remove all the leading 0s
        return String.valueOf(Long.parseLong(nodePrefix));
    }

    public static int getUniqueTaskId(JobConf job) {
        //TODO: investigate ID pattern, required for parallel jobs
        /*String nodePrefix = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID); 
        return IDHandler.extractIntID(nodePrefix);*/

        String nodePrefix = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
        int j = nodePrefix.lastIndexOf("_");
        int i = nodePrefix.lastIndexOf("_", j - 1);
        nodePrefix = nodePrefix.substring(i + 1, j);
        // System.out.println("nodePrefix = " + nodePrefix) ;
        return Integer.valueOf(nodePrefix);
    }

    public static String getGloballyUniqueName(JobConf job) {
        return job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
    }

    public static boolean existsFileOnHDFS(String fname) {
        //robustness for empty strings (e.g., JMLC, MLContext)
        if (fname == null || fname.isEmpty() || fname.trim().isEmpty())
            return false;

        try {
            Path path = new Path(fname);
            return IOUtilFunctions.getFileSystem(path).exists(path);
        } catch (Exception ex) {
            LOG.error("Failed check existsFileOnHDFS.", ex);
        }
        return false;
    }

    public static boolean isDirectory(String fname) {
        //robustness for empty strings (e.g., JMLC, MLContext)
        if (fname == null || fname.isEmpty() || fname.trim().isEmpty())
            return false;

        try {
            Path path = new Path(fname);
            return IOUtilFunctions.getFileSystem(path).isDirectory(path);
        } catch (Exception ex) {
            LOG.error("Failed check isDirectory.", ex);
        }
        return false;
    }

    public static FileStatus[] getDirectoryListing(String fname) {
        try {
            Path path = new Path(fname);
            return IOUtilFunctions.getFileSystem(path).listStatus(path);
        } catch (Exception ex) {
            LOG.error("Failed listing of directory contents.", ex);
        }
        return new FileStatus[0];
    }

    public static void deleteFileWithMTDIfExistOnHDFS(String fname) throws IOException {
        deleteFileIfExistOnHDFS(fname);
        deleteFileIfExistOnHDFS(fname + ".mtd");
    }

    public static void deleteFileIfExistOnHDFS(String dir) throws IOException {
        Path path = new Path(dir);
        deleteFileIfExists(IOUtilFunctions.getFileSystem(path), path);
    }

    public static void deleteFileIfExistOnHDFS(Path outpath, JobConf job) throws IOException {
        deleteFileIfExists(IOUtilFunctions.getFileSystem(outpath, job), outpath);
    }

    public static void deleteFileIfExistOnLFS(Path outpath, JobConf job) throws IOException {
        deleteFileIfExists(FileSystem.getLocal(job), outpath);
    }

    private static void deleteFileIfExists(FileSystem fs, Path outpath) throws IOException {
        if (fs.exists(outpath)) {
            int retries = MAX_DELETE_RETRIES;
            while (!fs.delete(outpath, true) && retries > 0) {
                retries--;
            }
        }
    }

    public static boolean isHDFSFileEmpty(String dir) throws IOException {
        //robustness for empty strings (e.g., JMLC, MLContext)
        if (dir == null || dir.isEmpty() || dir.trim().isEmpty())
            return false;
        Path path = new Path(dir);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        return isFileEmpty(fs, path);
    }

    public static boolean isFileEmpty(FileSystem fs, Path dir) throws IOException {
        FileStatus fstat = fs.getFileStatus(dir);

        if (fstat.isDirectory() || IOUtilFunctions.isObjectStoreFileScheme(dir)) {
            // it is a directory
            FileStatus[] stats = fs.listStatus(dir);
            if (stats != null) {
                for (FileStatus stat : stats) {
                    if (stat.getLen() > 0)
                        return false;
                }
                return true;
            } else {
                return true;
            }
        } else {
            // it is a regular file
            return (fstat.getLen() == 0);
        }
    }

    public static void renameFileOnHDFS(String originalDir, String newDir) throws IOException {
        Path pathOrig = new Path(originalDir);
        Path pathNew = new Path(newDir);
        if (!IOUtilFunctions.isSameFileScheme(pathOrig, pathNew))
            throw new IOException("Cannot rename files to different target file system.");

        deleteFileIfExistOnHDFS(newDir);
        FileSystem fs = IOUtilFunctions.getFileSystem(pathOrig);
        if (fs.exists(pathOrig))
            fs.rename(pathOrig, pathNew);
        else
            throw new FileNotFoundException(originalDir);
    }

    public static void mergeIntoSingleFile(String originalDir, String newFile) throws IOException {
        Path pathOrig = new Path(originalDir);
        Path pathNew = new Path(newFile);
        if (!IOUtilFunctions.isSameFileScheme(pathOrig, pathNew))
            throw new IOException("Cannot merge files into different target file system.");
        FileSystem fs = IOUtilFunctions.getFileSystem(pathOrig);
        FileUtil.copyMerge(fs, pathOrig, fs, pathNew, true, ConfigurationManager.getCachedJobConf(), null);
    }

    public static void copyFileOnHDFS(String originalDir, String newDir) throws IOException {
        Path originalPath = new Path(originalDir);
        Path newPath = new Path(newDir);
        boolean deleteSource = false;
        boolean overwrite = true;

        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        FileSystem fs = IOUtilFunctions.getFileSystem(originalPath, job);
        if (fs.exists(originalPath)) {
            FileUtil.copy(fs, originalPath, fs, newPath, deleteSource, overwrite, job);
        }
    }

    /**
     * Returns the size of a file or directory on hdfs in bytes.
     * 
     * @param path file system path
     * @return file size
     * @throws IOException if IOException occurs
     */
    public static long getFilesizeOnHDFS(Path path) throws IOException {
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        long ret = 0; //in bytes
        if (fs.isDirectory(path))
            ret = fs.getContentSummary(path).getLength();
        else
            ret = fs.getFileStatus(path).getLen();
        //note: filestatus would return 0 on directories

        return ret;
    }

    private static BufferedReader setupInputFile(String filename) throws IOException {
        Path path = new Path(filename);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
        return br;
    }

    public static double readDoubleFromHDFSFile(String filename) throws IOException {
        return (Double) readObjectFromHDFSFile(filename, ValueType.DOUBLE);
    }

    public static long readIntegerFromHDFSFile(String filename) throws IOException {
        return (Long) readObjectFromHDFSFile(filename, ValueType.INT);
    }

    public static boolean readBooleanFromHDFSFile(String filename) throws IOException {
        return (Boolean) readObjectFromHDFSFile(filename, ValueType.BOOLEAN);
    }

    public static String readStringFromHDFSFile(String filename) throws IOException {
        StringBuilder sb = new StringBuilder();
        try (BufferedReader br = setupInputFile(filename)) {
            // handle multi-line strings in the HDFS file
            String line = null;
            while ((line = br.readLine()) != null) {
                sb.append(line);
                sb.append("\n");
            }
        }

        //return string without last character
        return sb.substring(0, sb.length() - 1);
    }

    public static Object readObjectFromHDFSFile(String filename, ValueType vt) throws IOException {
        String line = null;
        try (BufferedReader br = setupInputFile(filename)) {
            line = br.readLine();
        }
        if (line == null)
            throw new IOException("Empty file on hdfs: " + filename);

        switch (vt) {
        case BOOLEAN:
            return Boolean.parseBoolean(line);
        case DOUBLE:
            return Double.parseDouble(line);
        case INT:
            return Long.parseLong(line);
        default:
            return line;
        }
    }

    private static BufferedWriter setupOutputFile(String filename) throws IOException {
        Path path = new Path(filename);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        return br;
    }

    public static void writeDoubleToHDFS(double d, String filename) throws IOException {
        writeObjectToHDFS(d, filename);
    }

    public static void writeIntToHDFS(long i, String filename) throws IOException {
        writeObjectToHDFS(i, filename);
    }

    public static void writeBooleanToHDFS(boolean b, String filename) throws IOException {
        writeObjectToHDFS(b, filename);
    }

    public static void writeStringToHDFS(String s, String filename) throws IOException {
        writeObjectToHDFS(s, filename);
    }

    public static void writeObjectToHDFS(Object obj, String filename) throws IOException {
        try (BufferedWriter br = setupOutputFile(filename)) {
            br.write(obj.toString());
        }
    }

    public static void writeDimsFile(String filename, byte[] unknownFlags, long[] maxRows, long[] maxCols)
            throws IOException {
        try (BufferedWriter br = setupOutputFile(filename)) {
            StringBuilder line = new StringBuilder();
            for (int i = 0; i < unknownFlags.length; i++) {
                if (unknownFlags[i] != (byte) 0) {
                    line.append(i);
                    line.append(" " + maxRows[i]);
                    line.append(" " + maxCols[i]);
                    line.append("\n");
                }
            }
            br.write(line.toString());
        }
    }

    public static MatrixCharacteristics[] processDimsFiles(String dir, MatrixCharacteristics[] stats)
            throws IOException {
        Path path = new Path(dir);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);

        if (!fs.exists(path))
            return stats;

        FileStatus fstat = fs.getFileStatus(path);

        if (fstat.isDirectory()) {
            FileStatus[] files = fs.listStatus(path);
            for (int i = 0; i < files.length; i++) {
                Path filePath = files[i].getPath();
                try (BufferedReader br = setupInputFile(filePath.toString())) {
                    String line = "";
                    while ((line = br.readLine()) != null) {
                        String[] parts = line.split(" ");
                        int resultIndex = Integer.parseInt(parts[0]);
                        long maxRows = Long.parseLong(parts[1]);
                        long maxCols = Long.parseLong(parts[2]);

                        stats[resultIndex].setDimension(
                                (stats[resultIndex].getRows() < maxRows ? maxRows : stats[resultIndex].getRows()),
                                (stats[resultIndex].getCols() < maxCols ? maxCols : stats[resultIndex].getCols()));
                    }
                }
            }
        } else {
            throw new IOException(dir + " is expected to be a folder!");
        }

        return stats;
    }

    public static void writeMetaDataFile(String mtdfile, ValueType vt, MatrixCharacteristics mc, OutputInfo outinfo)
            throws IOException {
        writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo);
    }

    public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt,
            MatrixCharacteristics mc, OutputInfo outinfo) throws IOException {
        writeMetaDataFile(mtdfile, vt, schema, dt, mc, outinfo, null);
    }

    public static void writeMetaDataFile(String mtdfile, ValueType vt, MatrixCharacteristics mc, OutputInfo outinfo,
            FileFormatProperties formatProperties) throws IOException {
        writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo, formatProperties);
    }

    public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt,
            MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties)
            throws IOException {
        Path path = new Path(mtdfile);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)))) {
            String mtd = metaDataToString(vt, schema, dt, mc, outinfo, formatProperties);
            br.write(mtd);
        } catch (Exception e) {
            throw new IOException("Error creating and writing metadata JSON file", e);
        }
    }

    public static void writeScalarMetaDataFile(String mtdfile, ValueType vt) throws IOException {
        Path path = new Path(mtdfile);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)))) {
            String mtd = metaDataToString(vt, null, DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
            br.write(mtd);
        } catch (Exception e) {
            throw new IOException("Error creating and writing metadata JSON file", e);
        }
    }

    public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
            OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException {
        OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file

        //handle data type and value types (incl schema for frames)
        mtd.put(DataExpression.DATATYPEPARAM, dt.toString().toLowerCase());
        if (schema == null) {
            mtd.put(DataExpression.VALUETYPEPARAM, vt.toString().toLowerCase());
        } else {
            StringBuffer schemaSB = new StringBuffer();
            for (int i = 0; i < schema.length; i++) {
                if (schema[i] == ValueType.UNKNOWN)
                    schemaSB.append("*");
                else
                    schemaSB.append(schema[i].toString());
                schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER);
            }
            mtd.put(DataExpression.SCHEMAPARAM, schemaSB.toString());
        }

        //handle output dimensions
        if (!dt.isScalar()) {
            mtd.put(DataExpression.READROWPARAM, mc.getRows());
            mtd.put(DataExpression.READCOLPARAM, mc.getCols());
            // handle output nnz and binary block configuration
            if (dt.isMatrix()) {
                if (outinfo == OutputInfo.BinaryBlockOutputInfo) {
                    mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, mc.getRowsPerBlock());
                    mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, mc.getColsPerBlock());
                }
                mtd.put(DataExpression.READNUMNONZEROPARAM, mc.getNonZeros());
            }
        }

        //handle format type and additional arguments   
        mtd.put(DataExpression.FORMAT_TYPE, OutputInfo.outputInfoToStringExternal(outinfo));
        if (outinfo == OutputInfo.CSVOutputInfo) {
            CSVFileFormatProperties csvProperties = (formatProperties == null) ? new CSVFileFormatProperties()
                    : (CSVFileFormatProperties) formatProperties;
            mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader());
            mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim());
        }

        if (formatProperties != null) {
            String description = formatProperties.getDescription();
            if (StringUtils.isNotEmpty(description)) {
                String jsonDescription = StringEscapeUtils.escapeJson(description);
                mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription);
            }
        }

        String userName = System.getProperty("user.name");
        if (StringUtils.isNotEmpty(userName)) {
            mtd.put(DataExpression.AUTHORPARAM, userName);
        } else {
            mtd.put(DataExpression.AUTHORPARAM, "SystemML");
        }

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
        mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));

        return mtd.toString(4); // indent with 4 spaces   
    }

    public static double[][] readMatrixFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen, int brlen,
            int bclen) throws IOException, DMLRuntimeException {
        MatrixReader reader = MatrixReaderFactory.createMatrixReader(inputinfo);
        long estnnz = (rlen <= 0 || clen <= 0) ? -1 : rlen * clen;
        MatrixBlock mb = reader.readMatrixFromHDFS(dir, rlen, clen, brlen, bclen, estnnz);
        return DataConverter.convertToDoubleMatrix(mb);
    }

    public static double[] readColumnVectorFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen,
            int brlen, int bclen) throws IOException, DMLRuntimeException {
        MatrixReader reader = MatrixReaderFactory.createMatrixReader(inputinfo);
        long estnnz = (rlen <= 0 || clen <= 0) ? -1 : rlen * clen;
        MatrixBlock mb = reader.readMatrixFromHDFS(dir, rlen, clen, brlen, bclen, estnnz);
        return DataConverter.convertToDoubleVector(mb, false);
    }

    public static double median(String dir, MetaDataNumItemsByEachReducer metadata) throws IOException {
        long[] counts = metadata.getNumItemsArray();
        long[] ranges = new long[counts.length];
        ranges[0] = counts[0];
        for (int i = 1; i < counts.length; i++)
            ranges[i] = ranges[i - 1] + counts[i];

        long total = ranges[ranges.length - 1];

        return pickValueWeight(dir, metadata, 0.5, total % 2 == 0)[0];
    }

    public static double pickValue(String dir, MetaDataNumItemsByEachReducer metadata, double p)
            throws IOException {
        return pickValueWeight(dir, metadata, p, false)[0];
    }

    public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p,
            boolean average) throws IOException {
        long[] counts = metadata.getNumItemsArray();
        long[] ranges = new long[counts.length];
        ranges[0] = counts[0];
        for (int i = 1; i < counts.length; i++)
            ranges[i] = ranges[i - 1] + counts[i];

        long total = ranges[ranges.length - 1];

        // do averaging only if it is asked for; and sum_wt is even
        average = average && (total % 2 == 0);

        int currentPart = 0;
        double cum_weight = 0;
        long pos = (long) Math.ceil(total * p);
        while (ranges[currentPart] < pos) {
            currentPart++;
            cum_weight += ranges[currentPart];
        }
        int offset;
        if (currentPart > 0)
            offset = (int) (pos - ranges[currentPart - 1] - 1);
        else
            offset = (int) pos - 1;

        Path path = new Path(dir);
        FileSystem fs = IOUtilFunctions.getFileSystem(path);
        FileStatus[] files = fs.listStatus(path);
        Path fileToRead = null;
        for (FileStatus file : files)
            if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
                fileToRead = file.getPath();
                break;
            }

        if (fileToRead == null)
            throw new RuntimeException("cannot read partition " + currentPart);

        int buffsz = 64 * 1024;
        DoubleWritable readKey = new DoubleWritable();
        IntWritable readValue = new IntWritable();
        FSDataInputStream currentStream = null;
        double ret = -1;
        try {
            currentStream = fs.open(fileToRead, buffsz);

            boolean contain0s = false;
            long numZeros = 0;
            if (currentPart == metadata.getPartitionOfZero()) {
                contain0s = true;
                numZeros = metadata.getNumberOfZero();
            }
            ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);

            int numRead = 0;
            while (numRead <= offset) {
                reader.readNextKeyValuePairs(readKey, readValue);
                numRead += readValue.get();
                cum_weight += readValue.get();
            }

            ret = readKey.get();
            if (average) {
                if (numRead <= offset + 1) {
                    reader.readNextKeyValuePairs(readKey, readValue);
                    cum_weight += readValue.get();
                    ret = (ret + readKey.get()) / 2;
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(currentStream);
        }
        return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
    }

    public static void createDirIfNotExistOnHDFS(String dir, String permissions) throws IOException {
        createDirIfNotExistOnHDFS(new Path(dir), permissions);
    }

    public static void createDirIfNotExistOnHDFS(Path path, String permissions) throws IOException {
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            if (!fs.exists(path)) {
                char[] c = permissions.toCharArray();
                short sU = (short) ((c[0] - 48) * 64);
                short sG = (short) ((c[1] - 48) * 8);
                short sO = (short) ((c[2] - 48));
                short mode = (short) (sU + sG + sO);
                FsPermission perm = new FsPermission(mode);
                fs.mkdirs(path, perm);
            }
        } catch (Exception ex) {
            throw new IOException("Failed in creating a non existing dir on HDFS", ex);
        }

        //NOTE: we depend on the configured umask, setting umask in job or fspermission has no effect
        //similarly setting MRConfigurationNames.DFS_DATANODE_DATA_DIR_PERM as no effect either.
    }

    public static FSDataOutputStream getHDFSDataOutputStream(String filename, boolean overwrite)
            throws IOException {
        Path path = new Path(filename);
        return IOUtilFunctions.getFileSystem(path).create(path, overwrite);
    }
}