Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p,
        boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];//from   w  w w .  ja v  a 2 s .  co m
    for (int i = 1; i < counts.length; i++)
        ranges[i] = ranges[i - 1] + counts[i];

    long total = ranges[ranges.length - 1];

    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);

    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;

    FileSystem fs = FileSystem.get(_rJob);
    Path path = new Path(dir);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files)
        if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
            fileToRead = file.getPath();
            break;
        }

    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);

    FSDataInputStream currentStream = fs.open(fileToRead);
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();

    boolean contain0s = false;
    long numZeros = 0;
    if (currentPart == metadata.getPartitionOfZero()) {
        contain0s = true;
        numZeros = metadata.getNumberOfZero();
    }
    ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);

    int numRead = 0;
    while (numRead <= offset) {
        reader.readNextKeyValuePairs(readKey, readValue);
        numRead += readValue.get();
        cum_weight += readValue.get();
    }

    double ret = readKey.get();
    if (average) {
        if (numRead <= offset + 1) {
            reader.readNextKeyValuePairs(readKey, readValue);
            cum_weight += readValue.get();
            ret = (ret + readKey.get()) / 2;
        }
    }
    currentStream.close();
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * Compares contents of an expected file with the actual file, where rows may be permuted
 * @param expectedFile//ww  w  . j  a  v  a 2 s  . co m
 * @param actualDir
 * @param epsilon
 */
public static void compareDMLMatrixWithJavaMatrixRowsOutOfOrder(String expectedFile, String actualDir,
        double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(actualDir);
        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));

        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();
        String line;
        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();

        FileStatus[] outFiles = fs.listStatus(outDirectory);

        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                actualValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }

        ArrayList<Double> e_list = new ArrayList<Double>();
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            if (expectedValue != 0.0)
                e_list.add(expectedValue);
        }

        ArrayList<Double> a_list = new ArrayList<Double>();
        for (CellIndex index : actualValues.keySet()) {
            Double actualValue = actualValues.get(index);
            if (actualValue != 0.0)
                a_list.add(actualValue);
        }

        Collections.sort(e_list);
        Collections.sort(a_list);

        assertTrue("Matrix nzs not equal", e_list.size() == a_list.size());
        for (int i = 0; i < e_list.size(); i++) {
            assertTrue("Matrix values not equals", Math.abs(e_list.get(i) - a_list.get(i)) <= epsilon);
        }

    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>/*from w  w w.j a  v a2 s .com*/
 * Compares the expected values calculated in Java by testcase and which are
 * in the normal filesystem, with those calculated by SystemML located in
 * HDFS
 * </p>
 * 
 * @param expectedFile
 *            file with expected values, which is located in OS filesystem
 * @param actualDir
 *            file with actual values, which is located in HDFS
 * @param epsilon
 *            tolerance for value comparison
 */
public static void compareDMLMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(actualDir);
        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));

        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();
        String line;
        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();

        FileStatus[] outFiles = fs.listStatus(outDirectory);

        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                actualValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }

        int countErrors = 0;
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            Double actualValue = actualValues.get(index);
            if (expectedValue == null)
                expectedValue = 0.0;
            if (actualValue == null)
                actualValue = 0.0;

            //   System.out.println("actual value: "+actualValue+", expected value: "+expectedValue);

            if (!compareCellValue(expectedValue, actualValue, epsilon, false)) {
                System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue
                        + ", actual " + actualValue);
                countErrors++;
            }
        }
        assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0);
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * Reads values from a matrix file in HDFS in DML format
 * /*w  ww  . j  a v a  2  s .c  o m*/
 * @deprecated You should not use this method, it is recommended to use the
 *             corresponding method in AutomatedTestBase
 * @param filePath
 * @return
 */
public static HashMap<CellIndex, Double> readDMLMatrixFromHDFS(String filePath) {
    HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();

    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(filePath);
        String line;

        FileStatus[] outFiles = fs.listStatus(outDirectory);
        for (FileStatus file : outFiles) {
            FSDataInputStream outIn = fs.open(file.getPath());
            BufferedReader reader = new BufferedReader(new InputStreamReader(outIn));
            while ((line = reader.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                expectedValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }
    } catch (IOException e) {
        assertTrue("could not read from file " + filePath, false);
    }

    return expectedValues;
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

public static double readDMLScalar(String filePath) {
    FileSystem fs;
    try {//from  w  w w  .ja  v  a 2s.  c  o  m
        double d = Double.NaN;
        fs = FileSystem.get(conf);
        Path outDirectory = new Path(filePath);
        String line;
        FileStatus[] outFiles = fs.listStatus(outDirectory);
        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
                d = Double.parseDouble(line);
            }
            outIn.close();
        }
        return d;
    } catch (IOException e) {
        assertTrue("could not read from file " + filePath, false);
    }
    return Double.NaN;
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

public static boolean readDMLBoolean(String filePath) {
    FileSystem fs;
    try {/*ww  w  .  j  a v a 2  s.  co  m*/
        Boolean b = null;
        fs = FileSystem.get(conf);
        Path outDirectory = new Path(filePath);
        String line;
        FileStatus[] outFiles = fs.listStatus(outDirectory);
        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
                b = Boolean.valueOf(Boolean.parseBoolean(line));
            }
            outIn.close();
        }
        return b.booleanValue();
    } catch (IOException e) {
        assertTrue("could not read from file " + filePath, false);
    }
    return _AssertOccured;
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

public static String readDMLString(String filePath) {
    FileSystem fs;
    try {/*from w  w  w  .  ja v a2  s .  com*/
        String s = null;
        fs = FileSystem.get(conf);
        Path outDirectory = new Path(filePath);
        String line;
        FileStatus[] outFiles = fs.listStatus(outDirectory);
        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
                s = line;
            }
            outIn.close();
        }
        return s;
    } catch (IOException e) {
        assertTrue("could not read from file " + filePath, false);
    }
    return null;
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>/*from  w  ww .  j  a  v a 2 s .  c  o m*/
 * Compares a dml matrix file in HDFS with a file in normal file system
 * generated by R
 * </p>
 * 
 * @param rFile
 *            file with values calculated by R
 * @param hdfsDir
 *            file with actual values calculated by DML
 * @param epsilon
 *            tolerance for value comparison
 */
public static void compareDMLHDFSFileWithRFile(String rFile, String hdfsDir, double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(hdfsDir);
        BufferedReader compareIn = new BufferedReader(new FileReader(rFile));
        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();
        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();
        String line;
        /** skip both R header lines */
        compareIn.readLine();
        compareIn.readLine();
        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        FileStatus[] outFiles = fs.listStatus(outDirectory);

        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                actualValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }

        int countErrors = 0;
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            Double actualValue = actualValues.get(index);
            if (expectedValue == null)
                expectedValue = 0.0;
            if (actualValue == null)
                actualValue = 0.0;

            if (!compareCellValue(expectedValue, actualValue, epsilon, false))
                countErrors++;
        }
        assertTrue("for file " + hdfsDir + " " + countErrors + " values are not in equal", countErrors == 0);
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>/* w  ww .  jav  a 2s  .c o m*/
 * Checks a matrix read from a file in text format against a number of
 * specifications.
 * </p>
 * 
 * @param outDir
 *            directory containing the matrix
 * @param rows
 *            number of rows
 * @param cols
 *            number of columns
 * @param min
 *            minimum value
 * @param max
 *            maximum value
 */
public static void checkMatrix(String outDir, long rows, long cols, double min, double max) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(outDir);
        assertTrue(outDir + " does not exist", fs.exists(outDirectory));

        if (fs.getFileStatus(outDirectory).isDirectory()) {
            FileStatus[] outFiles = fs.listStatus(outDirectory);
            for (FileStatus file : outFiles) {
                FSDataInputStream fsout = fs.open(file.getPath());
                BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

                String line;
                while ((line = outIn.readLine()) != null) {
                    String[] rcv = line.split(" ");
                    long row = Long.parseLong(rcv[0]);
                    long col = Long.parseLong(rcv[1]);
                    double value = Double.parseDouble(rcv[2]);
                    assertTrue("invalid row index", (row > 0 && row <= rows));
                    assertTrue("invlaid column index", (col > 0 && col <= cols));
                    assertTrue("invalid value", ((value >= min && value <= max) || value == 0));
                }
                outIn.close();
            }
        } else {
            FSDataInputStream fsout = fs.open(outDirectory);
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            String line;
            while ((line = outIn.readLine()) != null) {
                String[] rcv = line.split(" ");
                long row = Long.parseLong(rcv[0]);
                long col = Long.parseLong(rcv[1]);
                double value = Double.parseDouble(rcv[2]);
                assertTrue("invalid row index", (row > 0 && row <= rows));
                assertTrue("invlaid column index", (col > 0 && col <= cols));
                assertTrue("invalid value", ((value >= min && value <= max) || value == 0));
            }
            outIn.close();
        }
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>// w  ww .  ja  v a  2s.co  m
 * Checks for matrix in directory existence.
 * </p>
 * 
 * @param outDir
 *            directory
 */
public static void checkForOutputExistence(String outDir) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(outDir);
        FileStatus[] outFiles = fs.listStatus(outDirectory);
        assertEquals("number of files in directory not 1", 1, outFiles.length);
        FSDataInputStream fsout = fs.open(outFiles[0].getPath());
        BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

        String outLine = outIn.readLine();
        outIn.close();
        assertNotNull("file is empty", outLine);
        assertTrue("file is empty", outLine.length() > 0);
    } catch (IOException e) {
        fail("unable to read " + outDir + ": " + e.getMessage());
    }
}