List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p, boolean average) throws IOException { long[] counts = metadata.getNumItemsArray(); long[] ranges = new long[counts.length]; ranges[0] = counts[0];//from w w w . ja v a 2 s . co m for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i]; long total = ranges[ranges.length - 1]; // do averaging only if it is asked for; and sum_wt is even average = average && (total % 2 == 0); int currentPart = 0; double cum_weight = 0; long pos = (long) Math.ceil(total * p); while (ranges[currentPart] < pos) { currentPart++; cum_weight += ranges[currentPart]; } int offset; if (currentPart > 0) offset = (int) (pos - ranges[currentPart - 1] - 1); else offset = (int) pos - 1; FileSystem fs = FileSystem.get(_rJob); Path path = new Path(dir); FileStatus[] files = fs.listStatus(path); Path fileToRead = null; for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) { fileToRead = file.getPath(); break; } if (fileToRead == null) throw new RuntimeException("cannot read partition " + currentPart); FSDataInputStream currentStream = fs.open(fileToRead); DoubleWritable readKey = new DoubleWritable(); IntWritable readValue = new IntWritable(); boolean contain0s = false; long numZeros = 0; if (currentPart == metadata.getPartitionOfZero()) { contain0s = true; numZeros = metadata.getNumberOfZero(); } ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros); int numRead = 0; while (numRead <= offset) { reader.readNextKeyValuePairs(readKey, readValue); numRead += readValue.get(); cum_weight += readValue.get(); } double ret = readKey.get(); if (average) { if (numRead <= offset + 1) { reader.readNextKeyValuePairs(readKey, readValue); cum_weight += readValue.get(); ret = (ret + readKey.get()) / 2; } } currentStream.close(); return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) }; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * Compares contents of an expected file with the actual file, where rows may be permuted * @param expectedFile//ww w . j a v a 2 s . co m * @param actualDir * @param epsilon */ public static void compareDMLMatrixWithJavaMatrixRowsOutOfOrder(String expectedFile, String actualDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(actualDir); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); String line; while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } outIn.close(); } ArrayList<Double> e_list = new ArrayList<Double>(); for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); if (expectedValue != 0.0) e_list.add(expectedValue); } ArrayList<Double> a_list = new ArrayList<Double>(); for (CellIndex index : actualValues.keySet()) { Double actualValue = actualValues.get(index); if (actualValue != 0.0) a_list.add(actualValue); } Collections.sort(e_list); Collections.sort(a_list); assertTrue("Matrix nzs not equal", e_list.size() == a_list.size()); for (int i = 0; i < e_list.size(); i++) { assertTrue("Matrix values not equals", Math.abs(e_list.get(i) - a_list.get(i)) <= epsilon); } } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/*from w w w.j a v a2 s .com*/ * Compares the expected values calculated in Java by testcase and which are * in the normal filesystem, with those calculated by SystemML located in * HDFS * </p> * * @param expectedFile * file with expected values, which is located in OS filesystem * @param actualDir * file with actual values, which is located in HDFS * @param epsilon * tolerance for value comparison */ public static void compareDMLMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(actualDir); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); String line; while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } outIn.close(); } int countErrors = 0; for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); Double actualValue = actualValues.get(index); if (expectedValue == null) expectedValue = 0.0; if (actualValue == null) actualValue = 0.0; // System.out.println("actual value: "+actualValue+", expected value: "+expectedValue); if (!compareCellValue(expectedValue, actualValue, epsilon, false)) { System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue + ", actual " + actualValue); countErrors++; } } assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0); } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * Reads values from a matrix file in HDFS in DML format * /*w ww . j a v a 2 s .c o m*/ * @deprecated You should not use this method, it is recommended to use the * corresponding method in AutomatedTestBase * @param filePath * @return */ public static HashMap<CellIndex, Double> readDMLMatrixFromHDFS(String filePath) { HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(filePath); String line; FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream outIn = fs.open(file.getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(outIn)); while ((line = reader.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } outIn.close(); } } catch (IOException e) { assertTrue("could not read from file " + filePath, false); } return expectedValues; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
public static double readDMLScalar(String filePath) { FileSystem fs; try {//from w w w .ja v a 2s. c o m double d = Double.NaN; fs = FileSystem.get(conf); Path outDirectory = new Path(filePath); String line; FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { // only 1 scalar value in file d = Double.parseDouble(line); } outIn.close(); } return d; } catch (IOException e) { assertTrue("could not read from file " + filePath, false); } return Double.NaN; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
public static boolean readDMLBoolean(String filePath) { FileSystem fs; try {/*ww w . j a v a 2 s. co m*/ Boolean b = null; fs = FileSystem.get(conf); Path outDirectory = new Path(filePath); String line; FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { // only 1 scalar value in file b = Boolean.valueOf(Boolean.parseBoolean(line)); } outIn.close(); } return b.booleanValue(); } catch (IOException e) { assertTrue("could not read from file " + filePath, false); } return _AssertOccured; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
public static String readDMLString(String filePath) { FileSystem fs; try {/*from w w w . ja v a2 s . com*/ String s = null; fs = FileSystem.get(conf); Path outDirectory = new Path(filePath); String line; FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { // only 1 scalar value in file s = line; } outIn.close(); } return s; } catch (IOException e) { assertTrue("could not read from file " + filePath, false); } return null; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/*from w ww . j a v a 2 s . c o m*/ * Compares a dml matrix file in HDFS with a file in normal file system * generated by R * </p> * * @param rFile * file with values calculated by R * @param hdfsDir * file with actual values calculated by DML * @param epsilon * tolerance for value comparison */ public static void compareDMLHDFSFileWithRFile(String rFile, String hdfsDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(hdfsDir); BufferedReader compareIn = new BufferedReader(new FileReader(rFile)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); String line; /** skip both R header lines */ compareIn.readLine(); compareIn.readLine(); while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } outIn.close(); } int countErrors = 0; for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); Double actualValue = actualValues.get(index); if (expectedValue == null) expectedValue = 0.0; if (actualValue == null) actualValue = 0.0; if (!compareCellValue(expectedValue, actualValue, epsilon, false)) countErrors++; } assertTrue("for file " + hdfsDir + " " + countErrors + " values are not in equal", countErrors == 0); } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/* w ww . jav a 2s .c o m*/ * Checks a matrix read from a file in text format against a number of * specifications. * </p> * * @param outDir * directory containing the matrix * @param rows * number of rows * @param cols * number of columns * @param min * minimum value * @param max * maximum value */ public static void checkMatrix(String outDir, long rows, long cols, double min, double max) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(outDir); assertTrue(outDir + " does not exist", fs.exists(outDirectory)); if (fs.getFileStatus(outDirectory).isDirectory()) { FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); String line; while ((line = outIn.readLine()) != null) { String[] rcv = line.split(" "); long row = Long.parseLong(rcv[0]); long col = Long.parseLong(rcv[1]); double value = Double.parseDouble(rcv[2]); assertTrue("invalid row index", (row > 0 && row <= rows)); assertTrue("invlaid column index", (col > 0 && col <= cols)); assertTrue("invalid value", ((value >= min && value <= max) || value == 0)); } outIn.close(); } } else { FSDataInputStream fsout = fs.open(outDirectory); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); String line; while ((line = outIn.readLine()) != null) { String[] rcv = line.split(" "); long row = Long.parseLong(rcv[0]); long col = Long.parseLong(rcv[1]); double value = Double.parseDouble(rcv[2]); assertTrue("invalid row index", (row > 0 && row <= rows)); assertTrue("invlaid column index", (col > 0 && col <= cols)); assertTrue("invalid value", ((value >= min && value <= max) || value == 0)); } outIn.close(); } } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>// w ww . ja v a 2s.co m * Checks for matrix in directory existence. * </p> * * @param outDir * directory */ public static void checkForOutputExistence(String outDir) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(outDir); FileStatus[] outFiles = fs.listStatus(outDirectory); assertEquals("number of files in directory not 1", 1, outFiles.length); FSDataInputStream fsout = fs.open(outFiles[0].getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); String outLine = outIn.readLine(); outIn.close(); assertNotNull("file is empty", outLine); assertTrue("file is empty", outLine.length() > 0); } catch (IOException e) { fail("unable to read " + outDir + ": " + e.getMessage()); } }