Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.ibm.bi.dml.runtime.transform.MVImputeAgent.java

License:Open Source License

public String readScaleLine(int colID, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
    Path path = new Path(txMtdDir + "/Scale/" + agents.getName(colID) + SCALE_FILE_SUFFIX);
    TfUtils.checkValidInputFile(fs, path, true);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();/*from  w  w w . j  a v a 2 s .  c om*/
    br.close();

    return line;
}

From source file:com.ibm.bi.dml.runtime.transform.RecodeAgent.java

License:Open Source License

/**
 * Method to load recode maps of all attributes, at once.
 * //  www. j a v  a  2 s  .  c o m
 * @param job
 * @throws IOException
 */
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
    if (_rcdList == null)
        return;

    _finalMaps = new HashMap<Integer, HashMap<String, String>>();

    if (fs.isDirectory(txMtdDir)) {
        for (int i = 0; i < _rcdList.length; i++) {
            int colID = _rcdList[i];

            Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + RCD_MAP_FILE_SUFFIX);
            TfUtils.checkValidInputFile(fs, path, true);

            HashMap<String, String> map = new HashMap<String, String>();

            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
            String line = null, word = null;
            String rcdIndex = null;

            // Example line to parse: "WN (1)67492",1,61975
            while ((line = br.readLine()) != null) {

                // last occurrence of quotation mark
                int idxQuote = line.lastIndexOf('"');
                word = UtilFunctions.unquote(line.substring(0, idxQuote + 1));

                int idx = idxQuote + 2;
                while (line.charAt(idx) != TXMTD_SEP.charAt(0))
                    idx++;
                rcdIndex = line.substring(idxQuote + 2, idx);

                map.put(word, rcdIndex);
            }
            br.close();
            _finalMaps.put(colID, map);
        }
    } else {
        fs.close();
        throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
    }
}

From source file:com.ibm.bi.dml.runtime.transform.TfUtils.java

License:Open Source License

public static JSONObject readSpec(FileSystem fs, String specFile) throws IOException {
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(specFile))));
    JSONObject obj = JSONHelper.parse(br);
    br.close();//  w ww .  jav a2 s  . co  m
    return obj;
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

private static BufferedReader setupInputFile(String filename) throws IOException {
    Path pt = new Path(filename);
    FileSystem fs = FileSystem.get(_rJob);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));
    return br;//w ww  .  j  a v a  2  s .  c o m
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p,
        boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];//from www.j  a  va  2  s. c o m
    for (int i = 1; i < counts.length; i++)
        ranges[i] = ranges[i - 1] + counts[i];

    long total = ranges[ranges.length - 1];

    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);

    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;

    FileSystem fs = FileSystem.get(_rJob);
    Path path = new Path(dir);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files)
        if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
            fileToRead = file.getPath();
            break;
        }

    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);

    FSDataInputStream currentStream = fs.open(fileToRead);
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();

    boolean contain0s = false;
    long numZeros = 0;
    if (currentPart == metadata.getPartitionOfZero()) {
        contain0s = true;
        numZeros = metadata.getNumberOfZero();
    }
    ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);

    int numRead = 0;
    while (numRead <= offset) {
        reader.readNextKeyValuePairs(readKey, readValue);
        numRead += readValue.get();
        cum_weight += readValue.get();
    }

    double ret = readKey.get();
    if (average) {
        if (numRead <= offset + 1) {
            reader.readNextKeyValuePairs(readKey, readValue);
            cum_weight += readValue.get();
            ret = (ret + readKey.get()) / 2;
        }
    }
    currentStream.close();
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

public static void compareDMLScalarWithJavaScalar(String expectedFile, String actualFile, double epsilon) {
    try {/* w  ww. j a va2  s  .com*/
        String lineExpected = null;
        String lineActual = null;
        FileSystem fs = FileSystem.get(conf);

        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));
        lineExpected = compareIn.readLine();
        compareIn.close();

        Path outFile = new Path(actualFile);
        FSDataInputStream fsout = fs.open(outFile);
        BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));
        lineActual = outIn.readLine();
        outIn.close();

        assertTrue(expectedFile + ": " + lineExpected + " vs " + actualFile + ": " + lineActual,
                lineActual.equals(lineExpected));
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * Compares contents of an expected file with the actual file, where rows may be permuted
 * @param expectedFile//from w  ww .  j  a v  a2 s.  co  m
 * @param actualDir
 * @param epsilon
 */
public static void compareDMLMatrixWithJavaMatrixRowsOutOfOrder(String expectedFile, String actualDir,
        double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(actualDir);
        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));

        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();
        String line;
        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();

        FileStatus[] outFiles = fs.listStatus(outDirectory);

        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                actualValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }

        ArrayList<Double> e_list = new ArrayList<Double>();
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            if (expectedValue != 0.0)
                e_list.add(expectedValue);
        }

        ArrayList<Double> a_list = new ArrayList<Double>();
        for (CellIndex index : actualValues.keySet()) {
            Double actualValue = actualValues.get(index);
            if (actualValue != 0.0)
                a_list.add(actualValue);
        }

        Collections.sort(e_list);
        Collections.sort(a_list);

        assertTrue("Matrix nzs not equal", e_list.size() == a_list.size());
        for (int i = 0; i < e_list.size(); i++) {
            assertTrue("Matrix values not equals", Math.abs(e_list.get(i) - a_list.get(i)) <= epsilon);
        }

    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>//  ww  w . ja  v  a  2  s  .c  o m
 * Compares the expected values calculated in Java by testcase and which are
 * in the normal filesystem, with those calculated by SystemML located in
 * HDFS with Matrix Market format
 * </p>
 * 
 * @param expectedFile
 *            file with expected values, which is located in OS filesystem
 * @param actualDir
 *            file with actual values, which is located in HDFS
 * @param epsilon
 *            tolerance for value comparison
 */
public static void compareMMMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(actualDir);
        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));

        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();

        // skip the header of Matrix Market file
        String line = compareIn.readLine();

        // rows, cols and nnz
        line = compareIn.readLine();
        String[] expRcn = line.split(" ");

        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();

        FSDataInputStream fsout = fs.open(outDirectory);
        BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

        //skip MM header
        line = outIn.readLine();

        //rows, cols and nnz
        line = outIn.readLine();
        String[] rcn = line.split(" ");

        if (Integer.parseInt(expRcn[0]) != Integer.parseInt(rcn[0])) {
            System.out.println(" Rows mismatch: expected " + Integer.parseInt(expRcn[0]) + ", actual "
                    + Integer.parseInt(rcn[0]));
        } else if (Integer.parseInt(expRcn[1]) != Integer.parseInt(rcn[1])) {
            System.out.println(" Cols mismatch: expected " + Integer.parseInt(expRcn[1]) + ", actual "
                    + Integer.parseInt(rcn[1]));
        } else if (Integer.parseInt(expRcn[2]) != Integer.parseInt(rcn[2])) {
            System.out.println(" Nnz mismatch: expected " + Integer.parseInt(expRcn[2]) + ", actual "
                    + Integer.parseInt(rcn[2]));
        }

        while ((line = outIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            actualValues.put(new CellIndex(i, j), v);
        }

        int countErrors = 0;
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            Double actualValue = actualValues.get(index);
            if (expectedValue == null)
                expectedValue = 0.0;
            if (actualValue == null)
                actualValue = 0.0;

            //   System.out.println("actual value: "+actualValue+", expected value: "+expectedValue);

            if (!compareCellValue(expectedValue, actualValue, epsilon, false)) {
                System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue
                        + ", actual " + actualValue);
                countErrors++;
            }
        }
        assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0);
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>//from w w  w. jav a2  s .  c o m
 * Compares the expected values calculated in Java by testcase and which are
 * in the normal filesystem, with those calculated by SystemML located in
 * HDFS
 * </p>
 * 
 * @param expectedFile
 *            file with expected values, which is located in OS filesystem
 * @param actualDir
 *            file with actual values, which is located in HDFS
 * @param epsilon
 *            tolerance for value comparison
 */
public static void compareDMLMatrixWithJavaMatrix(String expectedFile, String actualDir, double epsilon) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(actualDir);
        Path compareFile = new Path(expectedFile);
        FSDataInputStream fsin = fs.open(compareFile);
        BufferedReader compareIn = new BufferedReader(new InputStreamReader(fsin));

        HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();
        String line;
        while ((line = compareIn.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, " ");
            int i = Integer.parseInt(st.nextToken());
            int j = Integer.parseInt(st.nextToken());
            double v = Double.parseDouble(st.nextToken());
            expectedValues.put(new CellIndex(i, j), v);
        }
        compareIn.close();

        HashMap<CellIndex, Double> actualValues = new HashMap<CellIndex, Double>();

        FileStatus[] outFiles = fs.listStatus(outDirectory);

        for (FileStatus file : outFiles) {
            FSDataInputStream fsout = fs.open(file.getPath());
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            while ((line = outIn.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                actualValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }

        int countErrors = 0;
        for (CellIndex index : expectedValues.keySet()) {
            Double expectedValue = expectedValues.get(index);
            Double actualValue = actualValues.get(index);
            if (expectedValue == null)
                expectedValue = 0.0;
            if (actualValue == null)
                actualValue = 0.0;

            //   System.out.println("actual value: "+actualValue+", expected value: "+expectedValue);

            if (!compareCellValue(expectedValue, actualValue, epsilon, false)) {
                System.out.println(expectedFile + ": " + index + " mismatch: expected " + expectedValue
                        + ", actual " + actualValue);
                countErrors++;
            }
        }
        assertTrue("for file " + actualDir + " " + countErrors + " values are not equal", countErrors == 0);
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * Reads values from a matrix file in HDFS in DML format
 * //from  w  ww .  j  a  va 2 s.co  m
 * @deprecated You should not use this method, it is recommended to use the
 *             corresponding method in AutomatedTestBase
 * @param filePath
 * @return
 */
public static HashMap<CellIndex, Double> readDMLMatrixFromHDFS(String filePath) {
    HashMap<CellIndex, Double> expectedValues = new HashMap<CellIndex, Double>();

    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(filePath);
        String line;

        FileStatus[] outFiles = fs.listStatus(outDirectory);
        for (FileStatus file : outFiles) {
            FSDataInputStream outIn = fs.open(file.getPath());
            BufferedReader reader = new BufferedReader(new InputStreamReader(outIn));
            while ((line = reader.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ");
                int i = Integer.parseInt(st.nextToken());
                int j = Integer.parseInt(st.nextToken());
                double v = Double.parseDouble(st.nextToken());
                expectedValues.put(new CellIndex(i, j), v);
            }
            outIn.close();
        }
    } catch (IOException e) {
        assertTrue("could not read from file " + filePath, false);
    }

    return expectedValues;
}