edu.emory.bmi.medicurator.storage.HdfsStorage.java Source code

Java tutorial

Introduction

Here is the source code for edu.emory.bmi.medicurator.storage.HdfsStorage.java

Source

/*
 * Title:        Medicurator
 * Description:  Near duplicate detection framework for heterogeneous medical data sources
 * Licence:      Apache License Version 2.0 - http://www.apache.org/licenses/
 *
 * Copyright (c) 2016, Yiru Chen <chen1ru@pku.edu.cn>
 */
package edu.emory.bmi.medicurator.storage;

import edu.emory.bmi.medicurator.Constants;

import java.io.*;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.*;

/**
 * Implementation of HDFS storage
 */
public class HdfsStorage implements Storage, Serializable {
    private String hdfsPath = Constants.HDFS_BASEDIR;

    /**
     * save data from InputStream to a relative path
     * required parent directories will be created if not exists
     * @param path String path
     * @param in InputStream
      * @return boolean
      */
    public boolean saveToPath(String path, InputStream in) {
        try {
            Configuration conf = new Configuration();
            FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf);
            Path dst = new Path(hdfsPath + path);

            //Create a new file and write data to it.
            hdfs.mkdirs(dst.getParent());
            FSDataOutputStream out = hdfs.create(dst, true);
            byte[] b = new byte[1024];
            int numBytes = 0;
            while ((numBytes = in.read(b)) > 0) {
                out.write(b, 0, numBytes);
            }
            //Close all the file descripters
            out.close();
            hdfs.close();
            return true;
        } catch (Exception e) {
            System.out.println("[ERROR]  Exception occurs when HdfsStorage saveToPath (" + path + ")" + e);
        }
        return false;
    }

    /**
     * get the InputStream of a file with specified relative path
     * @param path
     * @return InputStream
      */
    public InputStream loadFromPath(String path) {
        InputStream in = null;
        try {
            Configuration conf = new Configuration();
            FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf);
            Path hdfspath = new Path(hdfsPath + path);
            if (!hdfs.exists(hdfspath)) {
                System.out.println("File does not exists");
                return in;
            }
            in = hdfs.open(hdfspath);
        } catch (Exception e) {
            System.out.println("[ERROR] Exception occurs when HdfsStorage loadFromPath (" + path + ")" + e);
        }
        return in;
    }
}