List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.ibm.bi.dml.runtime.transform.MVImputeAgent.java
License:Open Source License
public String readScaleLine(int colID, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException { Path path = new Path(txMtdDir + "/Scale/" + agents.getName(colID) + SCALE_FILE_SUFFIX); TfUtils.checkValidInputFile(fs, path, true); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine();/*from w w w . j a v a 2 s . c om*/ br.close(); return line; }
From source file:com.ibm.bi.dml.runtime.transform.RecodeAgent.java
License:Open Source License
/** * Method to load recode maps of all attributes, at once. * // www. j a v a 2 s . c o m * @param job * @throws IOException */ @Override public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException { if (_rcdList == null) return; _finalMaps = new HashMap<Integer, HashMap<String, String>>(); if (fs.isDirectory(txMtdDir)) { for (int i = 0; i < _rcdList.length; i++) { int colID = _rcdList[i]; Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + RCD_MAP_FILE_SUFFIX); TfUtils.checkValidInputFile(fs, path, true); HashMap<String, String> map = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = null, word = null; String rcdIndex = null; // Example line to parse: "WN (1)67492",1,61975 while ((line = br.readLine()) != null) { // last occurrence of quotation mark int idxQuote = line.lastIndexOf('"'); word = UtilFunctions.unquote(line.substring(0, idxQuote + 1)); int idx = idxQuote + 2; while (line.charAt(idx) != TXMTD_SEP.charAt(0)) idx++; rcdIndex = line.substring(idxQuote + 2, idx); map.put(word, rcdIndex); } br.close(); _finalMaps.put(colID, map); } } else { fs.close(); throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir); } }
From source file:com.ibm.bi.dml.runtime.transform.TfUtils.java
License:Open Source License
public static JSONObject readSpec(FileSystem fs, String specFile) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(specFile)))); JSONObject obj = JSONHelper.parse(br); br.close();// w ww . jav a2 s . co m return obj; }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
private static BufferedReader setupInputFile(String filename) throws IOException { Path pt = new Path(filename); FileSystem fs = FileSystem.get(_rJob); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt))); return br;//w ww . j a v a 2 s . c o m }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p, boolean average) throws IOException { long[] counts = metadata.getNumItemsArray(); long[] ranges = new long[counts.length]; ranges[0] = counts[0];//from www.j a va 2 s. c o m for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i]; long total = ranges[ranges.length - 1]; // do averaging only if it is asked for; and sum_wt is even average = average && (total % 2 == 0); int currentPart = 0; double cum_weight = 0; long pos = (long) Math.ceil(total * p); while (ranges[currentPart] < pos) { currentPart++; cum_weight += ranges[currentPart]; } int offset; if (currentPart > 0) offset = (int) (pos - ranges[currentPart - 1] - 1); else offset = (int) pos - 1; FileSystem fs = FileSystem.get(_rJob); Path path = new Path(dir); FileStatus[] files = fs.listStatus(path); Path fileToRead = null; for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) { fileToRead = file.getPath(); break; } if (fileToRead == null) throw new RuntimeException("cannot read partition " + currentPart); FSDataInputStream currentStream = fs.open(fileToRead); DoubleWritable readKey = new DoubleWritable(); IntWritable readValue = new IntWritable(); boolean contain0s = false; long numZeros = 0; if (currentPart == metadata.getPartitionOfZero()) { contain0s = true; numZeros = metadata.getNumberOfZero(); } ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros); int numRead = 0; while (numRead <= offset) { reader.readNextKeyValuePairs(readKey, readValue); numRead += readValue.get(); cum_weight += readValue.get(); } double ret = readKey.get(); if (average) { if (numRead <= offset + 1) { reader.readNextKeyValuePairs(readKey, readValue); cum_weight += readValue.get(); ret = (ret + readKey.get()) / 2; } } currentStream.close(); return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) }; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
public static void compareDMLScalarWithJavaScalar(String expectedFile, String actualFile, double epsilon) { try {/* w ww. j a va2 s .com*/ String lineExpected = null; String lineActual = null; FileSystem fs = FileSystem.get(conf); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); lineExpected = compareIn.readLine(); compareIn.close(); Path outFile = new Path(actualFile); FSDataInputStream fsout = fs.open(outFile); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); lineActual = outIn.readLine(); outIn.close(); assertTrue(expectedFile + ": " + lineExpected + " vs " + actualFile + ": " + lineActual, lineActual.equals(lineExpected)); } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * Compares contents of an expected file with the actual file, where rows may be permuted * @param expectedFile//from w ww . j a v a2 s. co m * @param actualDir * @param epsilon */ public static void compareDMLMatrixWithJavaMatrixRowsOutOfOrder(String expectedFile, String actualDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(actualDir); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); String line; while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } outIn.close(); } ArrayList<Double> e_list = new ArrayList<Double>(); for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); if (expectedValue != 0.0) e_list.add(expectedValue); } ArrayList<Double> a_list = new ArrayList<Double>(); for (CellIndex index : actualValues.keySet()) { Double actualValue = actualValues.get(index); if (actualValue != 0.0) a_list.add(actualValue); } Collections.sort(e_list); Collections.sort(a_list); assertTrue("Matrix nzs not equal", e_list.size() == a_list.size()); for (int i = 0; i < e_list.size(); i++) { assertTrue("Matrix values not equals", Math.abs(e_list.get(i) - a_list.get(i)) <= epsilon); } } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>// ww w . ja v a 2 s .c o m * Compares the expected values calculated in Java by testcase and which are * in the normal filesystem, with those calculated by SystemML located in * HDFS with Matrix Market format * </p> * * @param expectedFile * file with expected values, which is located in OS filesystem * @param actualDir * file with actual values, which is located in HDFS * @param epsilon * tolerance for value comparison */ public static void compareMMMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(actualDir); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); // skip the header of Matrix Market file String line = compareIn.readLine(); // rows, cols and nnz line = compareIn.readLine(); String[] expRcn = line.split(" "); while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); FSDataInputStream fsout = fs.open(outDirectory); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); //skip MM header line = outIn.readLine(); //rows, cols and nnz line = outIn.readLine(); String[] rcn = line.split(" "); if (Integer.parseInt(expRcn[0]) != Integer.parseInt(rcn[0])) { System.out.println(" Rows mismatch: expected " + Integer.parseInt(expRcn[0]) + ", actual " + Integer.parseInt(rcn[0])); } else if (Integer.parseInt(expRcn[1]) != Integer.parseInt(rcn[1])) { System.out.println(" Cols mismatch: expected " + Integer.parseInt(expRcn[1]) + ", actual " + Integer.parseInt(rcn[1])); } else if (Integer.parseInt(expRcn[2]) != Integer.parseInt(rcn[2])) { System.out.println(" Nnz mismatch: expected " + Integer.parseInt(expRcn[2]) + ", actual " + Integer.parseInt(rcn[2])); } while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } int countErrors = 0; for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); Double actualValue = actualValues.get(index); if (expectedValue == null) expectedValue = 0.0; if (actualValue == null) actualValue = 0.0; // System.out.println("actual value: "+actualValue+", expected value: "+expectedValue); if (!compareCellValue(expectedValue, actualValue, epsilon, false)) { System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue + ", actual " + actualValue); countErrors++; } } assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0); } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>//from w w w. jav a2 s . c o m * Compares the expected values calculated in Java by testcase and which are * in the normal filesystem, with those calculated by SystemML located in * HDFS * </p> * * @param expectedFile * file with expected values, which is located in OS filesystem * @param actualDir * file with actual values, which is located in HDFS * @param epsilon * tolerance for value comparison */ public static void compareDMLMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(actualDir); Path compareFile = new Path(expectedFile); FSDataInputStream fsin = fs.open(compareFile); BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin)); HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); String line; while ((line = compareIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } compareIn.close(); HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>(); FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); while ((line = outIn.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); actualValues.put(new CellIndex(i, j), v); } outIn.close(); } int countErrors = 0; for (CellIndex index : expectedValues.keySet()) { Double expectedValue = expectedValues.get(index); Double actualValue = actualValues.get(index); if (expectedValue == null) expectedValue = 0.0; if (actualValue == null) actualValue = 0.0; // System.out.println("actual value: "+actualValue+", expected value: "+expectedValue); if (!compareCellValue(expectedValue, actualValue, epsilon, false)) { System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue + ", actual " + actualValue); countErrors++; } } assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0); } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * Reads values from a matrix file in HDFS in DML format * //from w ww . j a va 2 s.co m * @deprecated You should not use this method, it is recommended to use the * corresponding method in AutomatedTestBase * @param filePath * @return */ public static HashMap<CellIndex, Double> readDMLMatrixFromHDFS(String filePath) { HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>(); try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(filePath); String line; FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream outIn = fs.open(file.getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(outIn)); while ((line = reader.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, " "); int i = Integer.parseInt(st.nextToken()); int j = Integer.parseInt(st.nextToken()); double v = Double.parseDouble(st.nextToken()); expectedValues.put(new CellIndex(i, j), v); } outIn.close(); } } catch (IOException e) { assertTrue("could not read from file " + filePath, false); } return expectedValues; }