io.hops.hopsworks.api.zeppelin.notebook.repo.HDFSNotebookRepo.java Source code

Java tutorial

Introduction

Here is the source code for io.hops.hopsworks.api.zeppelin.notebook.repo.HDFSNotebookRepo.java

Source

/*
 * Changes to this file committed after and not including commit-id: ccc0d2c5f9a5ac661e60e6eaf138de7889928b8b
 * are released under the following license:
 *
 * This file is part of Hopsworks
 * Copyright (C) 2018, Logical Clocks AB. All rights reserved
 *
 * Hopsworks is free software: you can redistribute it and/or modify it under the terms of
 * the GNU Affero General Public License as published by the Free Software Foundation,
 * either version 3 of the License, or (at your option) any later version.
 *
 * Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 * PURPOSE.  See the GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see <https://www.gnu.org/licenses/>.
 *
 * Changes to this file committed before and including commit-id: ccc0d2c5f9a5ac661e60e6eaf138de7889928b8b
 * are released under the following license:
 *
 * Copyright (C) 2013 - 2018, Logical Clocks AB and RISE SICS AB. All rights reserved
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this
 * software and associated documentation files (the "Software"), to deal in the Software
 * without restriction, including without limitation the rights to use, copy, modify, merge,
 * publish, distribute, sublicense, and/or sell copies of the Software, and to permit
 * persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or
 * substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  OR IMPLIED, INCLUDING
 * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

package io.hops.hopsworks.api.zeppelin.notebook.repo;

import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import io.hops.hopsworks.common.hdfs.DistributedFileSystemOps;
import io.hops.hopsworks.common.hdfs.DistributedFsService;
import io.hops.hopsworks.common.hdfs.HdfsUsersController;
import io.hops.hopsworks.common.util.Settings;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
import org.apache.zeppelin.notebook.NotebookImportDeserializer;
import org.apache.zeppelin.notebook.Paragraph;
import org.apache.zeppelin.notebook.repo.NotebookRepo;
import org.apache.zeppelin.notebook.repo.NotebookRepoSettingsInfo;
import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.naming.InitialContext;
import javax.naming.NamingException;

/**
 *
 * Backend for storing Notebooks on hdfs
 */
public class HDFSNotebookRepo implements NotebookRepo {

    private final Logger logger = LoggerFactory.getLogger(HDFSNotebookRepo.class);

    private URI filesystemRoot;
    private final ZeppelinConfiguration conf;
    private String hdfsUser;
    private final Configuration hdfsConf;
    private final DistributedFsService dfsService;
    private final UserGroupInformation superuser;
    private final Pattern psuPattern = Pattern.compile("(\\w*)" + HdfsUsersController.USER_NAME_DELIMITER + "\\w*");
    private final Pattern pguPattern = Pattern.compile("(\\w*)" + Settings.PROJECT_GENERIC_USER_SUFFIX);

    public HDFSNotebookRepo(ZeppelinConfiguration conf) throws IOException {
        this.conf = conf;
        this.hdfsConf = getHadoopConf();
        superuser = UserGroupInformation.getLoginUser();
        try {
            //          ("java:global/hopsworks-common-0.2.0-SNAPSHOT/DistributedFsService");
            String applicationName = InitialContext.doLookup("java:app/AppName");
            String moduleName = InitialContext.doLookup("java:module/ModuleName");
            moduleName = moduleName.replace("api", "common");
            dfsService = InitialContext
                    .doLookup("java:global/" + applicationName + "/" + moduleName + "/DistributedFsService");
        } catch (NamingException ex) {
            throw new IOException(ex);
        }
        setNotebookDirectory(this.conf.getNotebookDir());
    }

    private void setNotebookDirectory(String notebookDir) throws IOException {
        try {
            filesystemRoot = new URI(notebookDir);
        } catch (URISyntaxException e1) {
            throw new IOException(e1);
        }
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        this.hdfsUser = ugi.getShortUserName();

        DistributedFileSystemOps dfso = null;
        try {
            dfso = getDfs(superuser);
            String url = filesystemRoot.getPath();
            Path path = new Path(url);
            if (!dfso.getFilesystem().exists(path)) {
                logger.info("Notebook dir does not exist.");
                throw new IOException("Notebook dir does not exist.");
            }
        } finally {
            if (null != dfso) {
                dfso.close();
            }
        }
    }

    private String getNotebookDirPath() {
        return filesystemRoot.getPath();
    }

    private Configuration getHadoopConf() {
        Configuration hdfsConfig;
        //get this from variables table
        String hadoopDir = System.getProperty("HADOOP_HOME");
        if (hadoopDir == null) {
            hadoopDir = "/srv/hadoop";
        }
        String hadoopConfDir = hadoopDir + "/etc/hadoop";

        File hdfsConfFile = new File(hadoopConfDir, "hdfs-site.xml");
        if (!hdfsConfFile.exists()) {
            logger.error("Unable to locate configuration file in {0}", hdfsConfFile);
            throw new IllegalStateException("No hdfs conf file: hdfs-site.xml");
        }

        File hadoopConfFile = new File(hadoopConfDir, "core-site.xml");
        if (!hadoopConfFile.exists()) {
            logger.error("Unable to locate configuration file in {0}", hadoopConfFile);
            throw new IllegalStateException("No hadoop conf file: core-site.xml");
        }

        File yarnConfFile = new File(hadoopConfDir, "yarn-site.xml");
        if (!yarnConfFile.exists()) {
            logger.error("Unable to locate configuration file in {0}", yarnConfFile);
            throw new IllegalStateException("No yarn conf file: yarn-site.xml");
        }

        //Set the Configuration object for the hdfs client
        Path yarnPath = new Path(yarnConfFile.getAbsolutePath());
        Path hdfsPath = new Path(hdfsConfFile.getAbsolutePath());
        Path hadoopPath = new Path(hadoopConfFile.getAbsolutePath());
        hdfsConfig = new Configuration();
        hdfsConfig.addResource(hadoopPath);
        hdfsConfig.addResource(yarnPath);
        hdfsConfig.addResource(hdfsPath);
        hdfsConfig.set("fs.permissions.umask-mode", "000");
        return hdfsConfig;
    }

    private DistributedFileSystemOps getDfs(UserGroupInformation ugi) {
        if (null != ugi) {
            if (ugi.getUserName().equals(superuser.getUserName())) {
                return dfsService.getDfsOps();
            }
            return dfsService.getDfsOps(ugi.getUserName());
        }
        return null;
    }

    private String getPath(String path) {
        if (path == null || path.trim().length() == 0) {
            return filesystemRoot.toString();
        }
        if (path.startsWith("/")) {
            return filesystemRoot.toString() + path;
        } else {
            return filesystemRoot.toString() + "/" + path;
        }
    }

    private Path getRootDir(DistributedFileSystemOps dfs) throws IOException {
        Path rootDir = new Path(getPath("/"));
        if (!dfs.getFilesystem().exists(rootDir)) {
            throw new IOException("Root path does not exists");
        }

        if (!dfs.getFilesystem().isDirectory(rootDir)) {
            throw new IOException("Root path is not a directory");
        }
        return rootDir;
    }

    private Note getNote(Path noteDir, DistributedFileSystemOps dfs) throws IOException {
        if (!dfs.getFilesystem().isDirectory(noteDir)) {
            throw new IOException(noteDir.toString() + " is not a directory");
        }

        Path noteJson = new Path(noteDir, "note.json");
        if (!dfs.getFilesystem().exists(noteJson)) {
            throw new IOException(noteJson.toString() + " not found");
        }

        GsonBuilder gsonBuilder = new GsonBuilder();
        gsonBuilder.setPrettyPrinting();
        Gson gson = gsonBuilder.registerTypeAdapter(Date.class, new NotebookImportDeserializer()).create();

        InputStream ins = dfs.open(noteJson);
        String json = IOUtils.toString(ins, conf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING));
        ins.close();

        Note note = gson.fromJson(json, Note.class);

        for (Paragraph p : note.getParagraphs()) {
            if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) {
                p.setStatus(Job.Status.ABORT);
            }
        }

        return note;
    }

    private NoteInfo getNoteInfo(Path noteDir, DistributedFileSystemOps dfs) throws IOException {
        Note note = getNote(noteDir, dfs);
        return new NoteInfo(note);
    }

    private DistributedFileSystemOps getDistributedFs(Path path, AuthenticationInfo subject,
            DistributedFileSystemOps dfs) throws IOException {
        DistributedFileSystemOps dfsOp = dfs;
        String owner;
        if (dfs.getFilesystem().exists(path)) {
            owner = dfs.getFileStatus(path).getOwner();
        } else {
            owner = subject.getUser();
        }
        if (subject != null && !owner.equals(subject.getUser())) {
            dfsOp = getDfs(UserGroupInformation.createProxyUser(owner, UserGroupInformation.getLoginUser()));
        }
        return dfsOp;
    }

    private DistributedFileSystemOps getUserDfs(AuthenticationInfo subject) throws IOException {
        UserGroupInformation ugi;
        if (subject == null || "anonymous".equals(subject.getUser())) {
            ugi = UserGroupInformation.createProxyUser(this.hdfsUser, UserGroupInformation.getLoginUser());
        } else {
            ugi = UserGroupInformation.createProxyUser(subject.getUser(), UserGroupInformation.getLoginUser());
        }
        return getDfs(ugi);
    }

    private void closeDfsClient(DistributedFileSystemOps dfso) {
        dfsService.closeDfsClient(dfso);
    }

    @Override
    public List<NoteInfo> list(AuthenticationInfo subject) throws IOException {
        DistributedFileSystemOps udfso = null;
        List<NoteInfo> infos = new LinkedList<>();

        try {
            udfso = getUserDfs(subject);
            Path rootDir = getRootDir(udfso);
            FileStatus[] children = udfso.listStatus(rootDir);

            for (FileStatus f : children) {
                String fileName = f.getPath().getName();
                if (fileName.startsWith(".") || fileName.startsWith("#") || fileName.startsWith("~")) {
                    // skip hidden, temporary files
                    continue;
                }

                if (!udfso.getFilesystem().isDirectory(f.getPath())) {
                    // currently single note is saved like, [NOTE_ID]/note.json.
                    // so it must be a directory
                    continue;
                }

                NoteInfo info;
                try {
                    info = getNoteInfo(f.getPath(), udfso);
                    if (info != null) {
                        infos.add(info);
                    }
                } catch (Exception e) {
                    logger.error("Can't read note " + f.getPath().toString(), e);
                }
            }
        } finally {
            closeDfsClient(udfso);
        }

        return infos;
    }

    @Override
    public Note get(String noteId, AuthenticationInfo subject) throws IOException {
        DistributedFileSystemOps udfso = null;
        Note note = null;
        try {
            udfso = getUserDfs(subject);
            Path rootDir = getRootDir(udfso);
            Path noteDir = new Path(rootDir, noteId);
            note = getNote(noteDir, udfso);
        } finally {
            closeDfsClient(udfso);
        }

        return note;
    }

    @Override
    public synchronized void save(Note note, AuthenticationInfo subject) throws IOException {
        GsonBuilder gsonBuilder = new GsonBuilder();
        gsonBuilder.setPrettyPrinting();
        Gson gson = gsonBuilder.create();
        String json = gson.toJson(note);

        DistributedFileSystemOps udfso = null;
        DistributedFileSystemOps dfsOp = null;
        try {
            udfso = getUserDfs(subject);
            Path rootDir = getRootDir(udfso);

            Path noteDir = new Path(rootDir, note.getId());
            //returns dfs for the owner of the dir if the dir exists.
            //so we do not change the owner of the notebook.
            dfsOp = getDistributedFs(noteDir, subject, udfso);

            FsPermission fsPermission;
            if (!udfso.getFilesystem().exists(noteDir)) {
                fsPermission = new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE, false);
                dfsOp.mkdir(noteDir, fsPermission);
            }
            if (!udfso.getFilesystem().isDirectory(noteDir)) {
                throw new IOException(noteDir.toString() + " is not a directory");
            }

            Path noteJson = new Path(noteDir, "note.json");
            Path noteJsonTemp = new Path(noteDir, ".note.json");
            // false means not appending. creates file if not exists
            OutputStream out = dfsOp.getFilesystem().create(noteJsonTemp);
            out.write(json.getBytes(conf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING)));
            out.close();
            dfsOp.getFilesystem().rename(noteJsonTemp, noteJson, Options.Rename.OVERWRITE);

        } finally {
            if (null != udfso) {
                if (null != dfsOp && !udfso.equals(dfsOp)) {
                    closeDfsClient(dfsOp);
                }
                closeDfsClient(udfso);
            }
        }
    }

    @Override
    public void remove(String noteId, AuthenticationInfo subject) throws IOException {
        DistributedFileSystemOps dfso = null;

        try {
            dfso = getDfs(superuser);
            Path rootDir = getRootDir(dfso);
            String hdfsOwner = dfso.getFileStatus(rootDir).getOwner();

            Matcher psuMatcher = psuPattern.matcher(hdfsOwner);
            if (psuMatcher.matches()) {
                String extractedPSUProjectname = psuMatcher.group(1);
                Matcher pguMatcher = pguPattern.matcher(subject.getUser());
                if (pguMatcher.matches()) {
                    String extractedPGUProjectname = pguMatcher.group(1);
                    if (!extractedPSUProjectname.equals(extractedPGUProjectname)) {
                        throw new IOException("User <" + subject.getUser() + "> does not " + "belong to project");
                    }
                }
            }

            Path noteDir = new Path(rootDir, noteId);

            if (!dfso.getFilesystem().exists(noteDir)) {
                // nothing to do
                return;
            }

            if (!dfso.getFilesystem().isDirectory(noteDir)) {
                // it does not look like zeppelin note savings
                throw new IOException("Can not remove " + noteDir.toString());
            }
            dfso.getFilesystem().delete(noteDir, true);
        } finally {
            if (dfso != null) {
                dfsService.closeDfsClient(dfso);
            }
        }
    }

    @Override
    public void close() {
        // no-op
    }

    @Override
    public Revision checkpoint(String noteId, String checkpointMsg, AuthenticationInfo subject) throws IOException {
        // no-op
        logger.warn("Checkpoint feature isn't supported in {}", this.getClass().toString());
        return Revision.EMPTY;
    }

    @Override
    public List<Revision> revisionHistory(String noteId, AuthenticationInfo subject) {
        logger.warn("Get Note revisions feature isn't supported in {}", this.getClass().toString());
        return Collections.emptyList();
    }

    @Override
    public Note get(String noteId, String revId, AuthenticationInfo subject) throws IOException {
        logger.warn("Get note revision feature isn't supported in {}", this.getClass().toString());
        return null;
    }

    @Override
    public Note setNoteRevision(String noteId, String revId, AuthenticationInfo subject) throws IOException {
        // Auto-generated method stub
        return null;
    }

    @Override
    public List<NotebookRepoSettingsInfo> getSettings(AuthenticationInfo subject) {
        NotebookRepoSettingsInfo repoSetting = NotebookRepoSettingsInfo.newInstance();
        List<NotebookRepoSettingsInfo> settings = Lists.newArrayList();

        repoSetting.name = "Notebook Path";
        repoSetting.type = NotebookRepoSettingsInfo.Type.INPUT;
        repoSetting.value = Collections.emptyList();
        repoSetting.selected = getNotebookDirPath();

        settings.add(repoSetting);
        return settings;
    }

    @Override
    public void updateSettings(Map<String, String> settings, AuthenticationInfo subject) {
        if (settings == null || settings.isEmpty()) {
            logger.error("Cannot update {} with empty settings", this.getClass().getName());
            return;
        }
        String newNotebookDirectotyPath = StringUtils.EMPTY;
        if (settings.containsKey("Notebook Path")) {
            newNotebookDirectotyPath = settings.get("Notebook Path");
        }

        if (StringUtils.isBlank(newNotebookDirectotyPath)) {
            logger.error("Notebook path is invalid");
            return;
        }
        logger.warn("{} will change notebook dir from {} to {}", subject.getUser(), getNotebookDirPath(),
                newNotebookDirectotyPath);
        try {
            setNotebookDirectory(newNotebookDirectotyPath);
        } catch (IOException e) {
            logger.error("Cannot update notebook directory", e);
        }
    }

}