Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.zeppelin.python; import io.grpc.ManagedChannelBuilder; import org.apache.commons.exec.CommandLine; import org.apache.commons.exec.DefaultExecutor; import org.apache.commons.exec.ExecuteException; import org.apache.commons.exec.ExecuteResultHandler; import org.apache.commons.exec.ExecuteWatchdog; import org.apache.commons.exec.LogOutputStream; import org.apache.commons.exec.PumpStreamHandler; import org.apache.commons.exec.environment.EnvironmentUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.zeppelin.interpreter.BaseZeppelinContext; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterException; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.remote.RemoteInterpreterUtils; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; import org.apache.zeppelin.interpreter.util.InterpreterOutputStream; import org.apache.zeppelin.python.proto.CancelRequest; import org.apache.zeppelin.python.proto.CompletionRequest; import org.apache.zeppelin.python.proto.CompletionResponse; import org.apache.zeppelin.python.proto.ExecuteRequest; import org.apache.zeppelin.python.proto.ExecuteResponse; import org.apache.zeppelin.python.proto.ExecuteStatus; import org.apache.zeppelin.python.proto.IPythonStatus; import org.apache.zeppelin.python.proto.StatusRequest; import org.apache.zeppelin.python.proto.StatusResponse; import org.apache.zeppelin.python.proto.StopRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import py4j.GatewayServer; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.nio.file.Files; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; /** * IPython Interpreter for Zeppelin */ public class IPythonInterpreter extends Interpreter implements ExecuteResultHandler { private static final Logger LOGGER = LoggerFactory.getLogger(IPythonInterpreter.class); private ExecuteWatchdog watchDog; private IPythonClient ipythonClient; private GatewayServer gatewayServer; protected BaseZeppelinContext zeppelinContext; private String pythonExecutable; private long ipythonLaunchTimeout; private String additionalPythonPath; private String additionalPythonInitFile; private boolean useBuiltinPy4j = true; private boolean usePy4JAuth = true; private String secret; private InterpreterOutputStream interpreterOutput = new InterpreterOutputStream(LOGGER); public IPythonInterpreter(Properties properties) { super(properties); } /** * Sub class can customize the interpreter by adding more python packages under PYTHONPATH. * e.g. PySparkInterpreter * * @param additionalPythonPath */ public void setAdditionalPythonPath(String additionalPythonPath) { LOGGER.info("setAdditionalPythonPath: " + additionalPythonPath); this.additionalPythonPath = additionalPythonPath; } /** * Sub class can customize the interpreter by running additional python init code. * e.g. PySparkInterpreter * * @param additionalPythonInitFile */ public void setAdditionalPythonInitFile(String additionalPythonInitFile) { this.additionalPythonInitFile = additionalPythonInitFile; } public void setAddBulitinPy4j(boolean add) { this.useBuiltinPy4j = add; } public BaseZeppelinContext buildZeppelinContext() { return new PythonZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(), Integer.parseInt(getProperty("zeppelin.python.maxResult", "1000"))); } @Override public void open() throws InterpreterException { try { if (ipythonClient != null) { // IPythonInterpreter might already been opened by PythonInterpreter return; } pythonExecutable = getProperty("zeppelin.python", "python"); LOGGER.info("Python Exec: " + pythonExecutable); String checkPrerequisiteResult = checkIPythonPrerequisite(pythonExecutable); if (!StringUtils.isEmpty(checkPrerequisiteResult)) { throw new InterpreterException("IPython prerequisite is not meet: " + checkPrerequisiteResult); } ipythonLaunchTimeout = Long.parseLong(getProperty("zeppelin.ipython.launch.timeout", "30000")); this.zeppelinContext = buildZeppelinContext(); int ipythonPort = RemoteInterpreterUtils.findRandomAvailablePortOnAllLocalInterfaces(); int jvmGatewayPort = RemoteInterpreterUtils.findRandomAvailablePortOnAllLocalInterfaces(); int message_size = Integer .parseInt(getProperty("zeppelin.ipython.grpc.message_size", 32 * 1024 * 1024 + "")); ipythonClient = new IPythonClient(ManagedChannelBuilder.forAddress("127.0.0.1", ipythonPort) .usePlaintext(true).maxInboundMessageSize(message_size)); this.usePy4JAuth = Boolean.parseBoolean(getProperty("zeppelin.py4j.useAuth", "true")); this.secret = PythonUtils.createSecret(256); launchIPythonKernel(ipythonPort); setupJVMGateway(jvmGatewayPort); } catch (Exception e) { throw new RuntimeException("Fail to open IPythonInterpreter", e); } } /** * non-empty return value mean the errors when checking ipython prerequisite. * empty value mean IPython prerequisite is meet. * * @param pythonExec * @return */ public String checkIPythonPrerequisite(String pythonExec) { ProcessBuilder processBuilder = new ProcessBuilder(pythonExec, "-m", "pip", "freeze"); try { File stderrFile = File.createTempFile("zeppelin", ".txt"); processBuilder.redirectError(stderrFile); File stdoutFile = File.createTempFile("zeppelin", ".txt"); processBuilder.redirectOutput(stdoutFile); Process proc = processBuilder.start(); int ret = proc.waitFor(); if (ret != 0) { return "Fail to run pip freeze.\n" + IOUtils.toString(new FileInputStream(stderrFile)); } String freezeOutput = IOUtils.toString(new FileInputStream(stdoutFile)); if (!freezeOutput.contains("jupyter-client=")) { return "jupyter-client is not installed."; } if (!freezeOutput.contains("ipykernel=")) { return "ipykernel is not installed"; } if (!freezeOutput.contains("ipython=")) { return "ipython is not installed"; } if (!freezeOutput.contains("grpcio=")) { return "grpcio is not installed"; } if (!freezeOutput.contains("protobuf=")) { return "protobuf is not installed"; } LOGGER.info("IPython prerequisite is met"); } catch (Exception e) { LOGGER.warn("Fail to checkIPythonPrerequisite", e); return "Fail to checkIPythonPrerequisite: " + ExceptionUtils.getStackTrace(e); } return ""; } private void setupJVMGateway(int jvmGatewayPort) throws IOException { String serverAddress = PythonUtils.getLocalIP(properties); this.gatewayServer = PythonUtils.createGatewayServer(this, serverAddress, jvmGatewayPort, secret, usePy4JAuth); gatewayServer.start(); InputStream input = getClass().getClassLoader().getResourceAsStream("grpc/python/zeppelin_python.py"); List<String> lines = IOUtils.readLines(input); ExecuteResponse response = ipythonClient.block_execute(ExecuteRequest.newBuilder() .setCode(StringUtils.join(lines, System.lineSeparator()) .replace("${JVM_GATEWAY_PORT}", jvmGatewayPort + "") .replace("${JVM_GATEWAY_ADDRESS}", serverAddress)) .build()); if (response.getStatus() == ExecuteStatus.ERROR) { throw new IOException("Fail to setup JVMGateway\n" + response.getOutput()); } input = getClass().getClassLoader().getResourceAsStream("python/zeppelin_context.py"); lines = IOUtils.readLines(input); response = ipythonClient.block_execute( ExecuteRequest.newBuilder().setCode(StringUtils.join(lines, System.lineSeparator())).build()); if (response.getStatus() == ExecuteStatus.ERROR) { throw new IOException("Fail to import ZeppelinContext\n" + response.getOutput()); } response = ipythonClient.block_execute(ExecuteRequest.newBuilder() .setCode("z = __zeppelin__ = PyZeppelinContext(intp.getZeppelinContext(), gateway)").build()); if (response.getStatus() == ExecuteStatus.ERROR) { throw new IOException("Fail to setup ZeppelinContext\n" + response.getOutput()); } if (additionalPythonInitFile != null) { input = getClass().getClassLoader().getResourceAsStream(additionalPythonInitFile); lines = IOUtils.readLines(input); response = ipythonClient.block_execute(ExecuteRequest.newBuilder() .setCode(StringUtils.join(lines, System.lineSeparator()) .replace("${JVM_GATEWAY_PORT}", jvmGatewayPort + "") .replace("${JVM_GATEWAY_ADDRESS}", serverAddress)) .build()); if (response.getStatus() == ExecuteStatus.ERROR) { throw new IOException("Fail to run additional Python init file: " + additionalPythonInitFile + "\n" + response.getOutput()); } } } private void launchIPythonKernel(int ipythonPort) throws IOException { LOGGER.info("Launching IPython Kernel at port: " + ipythonPort); // copy the python scripts to a temp directory, then launch ipython kernel in that folder File pythonWorkDir = Files.createTempDirectory("zeppelin_ipython").toFile(); String[] ipythonScripts = { "ipython_server.py", "ipython_pb2.py", "ipython_pb2_grpc.py" }; for (String ipythonScript : ipythonScripts) { URL url = getClass().getClassLoader().getResource("grpc/python" + "/" + ipythonScript); FileUtils.copyURLToFile(url, new File(pythonWorkDir, ipythonScript)); } CommandLine cmd = CommandLine.parse(pythonExecutable); cmd.addArgument(pythonWorkDir.getAbsolutePath() + "/ipython_server.py"); cmd.addArgument(ipythonPort + ""); DefaultExecutor executor = new DefaultExecutor(); ProcessLogOutputStream processOutput = new ProcessLogOutputStream(LOGGER); executor.setStreamHandler(new PumpStreamHandler(processOutput)); watchDog = new ExecuteWatchdog(ExecuteWatchdog.INFINITE_TIMEOUT); executor.setWatchdog(watchDog); if (useBuiltinPy4j) { //TODO(zjffdu) don't do hard code on py4j here File py4jDestFile = new File(pythonWorkDir, "py4j-src-0.10.7.zip"); FileUtils.copyURLToFile(getClass().getClassLoader().getResource("python/py4j-src-0.10.7.zip"), py4jDestFile); if (additionalPythonPath != null) { // put the py4j at the end, because additionalPythonPath may already contain py4j. // e.g. PySparkInterpreter additionalPythonPath = additionalPythonPath + ":" + py4jDestFile.getAbsolutePath(); } else { additionalPythonPath = py4jDestFile.getAbsolutePath(); } } Map<String, String> envs = setupIPythonEnv(); executor.execute(cmd, envs, this); // wait until IPython kernel is started or timeout long startTime = System.currentTimeMillis(); while (true) { try { Thread.sleep(100); } catch (InterruptedException e) { LOGGER.error("Interrupted by something", e); } try { StatusResponse response = ipythonClient.status(StatusRequest.newBuilder().build()); if (response.getStatus() == IPythonStatus.RUNNING) { LOGGER.info("IPython Kernel is Running"); break; } else { LOGGER.info("Wait for IPython Kernel to be started"); } } catch (Exception e) { // ignore the exception, because is may happen when grpc server has not started yet. LOGGER.info("Wait for IPython Kernel to be started"); } if ((System.currentTimeMillis() - startTime) > ipythonLaunchTimeout) { throw new IOException( "Fail to launch IPython Kernel in " + ipythonLaunchTimeout / 1000 + " seconds"); } } } protected Map<String, String> setupIPythonEnv() throws IOException { Map<String, String> envs = EnvironmentUtils.getProcEnvironment(); if (envs.containsKey("PYTHONPATH")) { if (additionalPythonPath != null) { envs.put("PYTHONPATH", additionalPythonPath + ":" + envs.get("PYTHONPATH")); } } else { envs.put("PYTHONPATH", additionalPythonPath); } if (usePy4JAuth) { envs.put("PY4J_GATEWAY_SECRET", secret); } LOGGER.info("PYTHONPATH:" + envs.get("PYTHONPATH")); return envs; } @Override public void close() throws InterpreterException { if (watchDog != null) { LOGGER.info("Kill IPython Process"); ipythonClient.stop(StopRequest.newBuilder().build()); watchDog.destroyProcess(); gatewayServer.shutdown(); } } @Override public InterpreterResult interpret(String st, InterpreterContext context) { zeppelinContext.setGui(context.getGui()); zeppelinContext.setNoteGui(context.getNoteGui()); zeppelinContext.setInterpreterContext(context); interpreterOutput.setInterpreterOutput(context.out); ExecuteResponse response = ipythonClient.stream_execute(ExecuteRequest.newBuilder().setCode(st).build(), interpreterOutput); try { interpreterOutput.getInterpreterOutput().flush(); } catch (IOException e) { throw new RuntimeException("Fail to write output", e); } InterpreterResult result = new InterpreterResult( InterpreterResult.Code.valueOf(response.getStatus().name())); return result; } @Override public void cancel(InterpreterContext context) throws InterpreterException { ipythonClient.cancel(CancelRequest.newBuilder().build()); } @Override public FormType getFormType() { return FormType.SIMPLE; } @Override public int getProgress(InterpreterContext context) throws InterpreterException { return 0; } @Override public List<InterpreterCompletion> completion(String buf, int cursor, InterpreterContext interpreterContext) { LOGGER.debug("Call completion for: " + buf); List<InterpreterCompletion> completions = new ArrayList<>(); CompletionResponse response = ipythonClient.complete( CompletionRequest.getDefaultInstance().newBuilder().setCode(buf).setCursor(cursor).build()); for (int i = 0; i < response.getMatchesCount(); i++) { String match = response.getMatches(i); int lastIndexOfDot = match.lastIndexOf("."); if (lastIndexOfDot != -1) { match = match.substring(lastIndexOfDot + 1); } completions.add(new InterpreterCompletion(match, match, "")); } return completions; } public BaseZeppelinContext getZeppelinContext() { return zeppelinContext; } @Override public void onProcessComplete(int exitValue) { LOGGER.warn("Python Process is completed with exitValue: " + exitValue); } @Override public void onProcessFailed(ExecuteException e) { LOGGER.warn("Exception happens in Python Process", e); } static class ProcessLogOutputStream extends LogOutputStream { private Logger logger; ProcessLogOutputStream(Logger logger) { this.logger = logger; } @Override protected void processLine(String s, int i) { this.logger.debug("Process Output: " + s); } } }