Java tutorial
/******************************************************************************* * Copyright 2016, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.harvest.script.utils; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import scala.Tuple2; import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.base.Charsets; import com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.BucketUtils; import com.ikanow.aleph2.data_model.utils.ErrorUtils; import com.ikanow.aleph2.data_model.utils.ProcessUtils; import com.ikanow.aleph2.harvest.script.data_model.ScriptHarvesterBucketConfigBean; import com.ikanow.aleph2.harvest.script.services.ScriptHarvestService; public class ScriptUtils { private static final Logger _logger = LogManager.getLogger(); private static final String TMP_SCRIPT_FILE_PREFIX = "tmp_script_"; private static final String ENV_TEST_NUM_OBJ = "A2_TEST_NUM_OBJECTS"; //number of test objects requested (onTest only) private static final String ENV_TEST_MAX_RUNTIME_S = "A2_TEST_MAX_RUNTIME_S"; //max seconds runtime (onTest only) private static final String ENV_LIBRARY_PATH = "A2_LIBRARY_PATH"; //path of cached library jar private static final String ENV_MODULE_PATH = "A2_MODULE_PATH"; //path of cached module jars private static final String ENV_CLASS_PATH = "A2_CLASS_PATH"; //path of lib + module jars private static final String ENV_BUCKET_HDFS_PATH = "A2_BUCKET_HDFS_PATH"; //path of bucket in hdfs private static final String ENV_BUCKET_PATH = "A2_BUCKET_PATH"; //subpath to bucket private static final String ENV_BUCKET_SIGNATURE = "A2_BUCKET_SIGNATURE"; //subpath to bucket private static final String ENV_BUCKET_STR = "A2_BUCKET_STR"; //string of bucket json private static final String LOCAL_RUN_DIR_SUFFIX = "run" + File.separator; /** * Copies the given script into a random output file in /tmp/ * Uses the test buckets full path and owner_id to create a unique path * * @param script * @param script_output_file */ public static String saveScriptToTempFile(final String script, final DataBucketBean bucket) throws IOException { final File script_output_file = new File(createTmpScriptFilePath(bucket)); FileUtils.writeStringToFile(script_output_file, script); return script_output_file.getAbsolutePath(); } /** * Creates a processbuilder pointed at the given script path and adds the working dir and environment vars for you. * Just runs a process that does "sh <script_file_path>" * @param script_file_path * @param working_dir * @return * @throws JsonProcessingException * @throws ExecutionException * @throws InterruptedException */ public static ProcessBuilder createProcessBuilderForScriptFile(final String script_file_path, final String working_dir, final Optional<Long> test_requested_num_objects, final Optional<Long> test_max_runtime_s, final Map<String, String> user_args, final IHarvestContext context, final DataBucketBean bucket, final String aleph_global_root_path) throws JsonProcessingException, InterruptedException, ExecutionException { _logger.debug("create pb for script file: " + script_file_path); ArrayList<String> args = new ArrayList<String>(); args.add("sh"); args.add(script_file_path); final ProcessBuilder pb = new ProcessBuilder(args); pb.directory(new File(working_dir)).redirectErrorStream(true); pb.environment().put("JAVA_OPTS", ""); if (test_requested_num_objects.isPresent()) pb.environment().put(ENV_TEST_NUM_OBJ, test_requested_num_objects.get().toString()); if (test_max_runtime_s.isPresent()) pb.environment().put(ENV_TEST_MAX_RUNTIME_S, test_max_runtime_s.get().toString()); //add in default env vars final String classpath = Stream .concat(context.getHarvestContextLibraries(Optional.empty()).stream(), context.getHarvestLibraries(Optional.of(bucket)).get().values().stream()) .collect(Collectors.joining(":")); pb.environment().put(ENV_MODULE_PATH, context.getHarvestContextLibraries(Optional.empty()).stream().collect(Collectors.joining(":"))); pb.environment().put(ENV_LIBRARY_PATH, context.getHarvestLibraries(Optional.of(bucket)).get().values() .stream().collect(Collectors.joining(":"))); pb.environment().put(ENV_CLASS_PATH, classpath); pb.environment().put(ENV_BUCKET_HDFS_PATH, aleph_global_root_path + "/data" + bucket.full_name()); pb.environment().put(ENV_BUCKET_SIGNATURE, BucketUtils.getUniqueSignature(bucket.full_name(), Optional.empty())); pb.environment().put(ENV_BUCKET_PATH, bucket.full_name()); pb.environment().put(ENV_BUCKET_STR, BeanTemplateUtils.toJson(bucket).toString()); //add user args as env vars user_args.forEach((k, val) -> pb.environment().put(k, val)); return pb; } /** * Creates a file path for a temporary local file that should be unique for this user/bucket, can copy scripts into this * location. * * @param bucket * @return */ private static String createTmpScriptFilePath(DataBucketBean bucket) { return new StringBuilder().append(System.getProperty("java.io.tmpdir")).append(File.separator) .append(TMP_SCRIPT_FILE_PREFIX) .append(BucketUtils.getUniqueSignature(bucket.full_name(), Optional.empty())).append("_") .append(bucket.owner_id()).append(".sh").toString(); } /** * Starts a script process and stores the PID somewhere so we can kill it later if need be. * * @param bucket */ public static BasicMessageBean startScriptProcess(final DataBucketBean bucket, final IHarvestContext context, final String aleph_local_root_path, final String aleph_global_root_path, final ScriptHarvesterBucketConfigBean config, final String message, final String working_dir, Optional<Long> requested_num_objects, Optional<Long> max_run_time_secs) { //TODO pass on user args in config.args to script call //TODO start script, record pid so we can kill later if need be //validate one of the 3 script fields was supplied if (!config.script().isEmpty() || !config.local_script_url().isEmpty() || !config.resource_name().isEmpty()) { _logger.debug("Running a script or script_file: " + config.script() + " or local: " + config.local_script_url() + " or resource: " + config.resource_name()); String script_file_path; //get the script file path from the 3 config options if (!config.script().isEmpty()) { //SCRIPT FIELD - copy to local file try { script_file_path = ScriptUtils.saveScriptToTempFile(config.script(), bucket); } catch (IOException e) { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Could not create temporary file to load script into: " + e.getMessage()); } } else if (!config.local_script_url().isEmpty()) { //SCRIPT FILE - point to it script_file_path = config.local_script_url(); } else { //RESOURCE - copy to local file, point to it try { final String resource = IOUtils .toString(ScriptHarvestService.class.getClassLoader() .getResourceAsStream(config.resource_name()), Charsets.UTF_8) .replaceAll("\r\n", "\n"); script_file_path = ScriptUtils.saveScriptToTempFile(resource, bucket); } catch (IOException e) { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Could not create temporary file to load script into: " + e.getMessage()); } catch (NullPointerException e) { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Could not find resource file: " + config.resource_name()); } } //run the script file (or script we copied into one) ProcessBuilder pb; try { pb = ScriptUtils.createProcessBuilderForScriptFile(script_file_path, working_dir, requested_num_objects, max_run_time_secs, config.args(), context, bucket, aleph_global_root_path); } catch (JsonProcessingException | InterruptedException | ExecutionException e) { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Could not create process to run: " + e.getMessage()); } final Tuple2<String, String> err_pid = ProcessUtils.launchProcess(pb, ScriptHarvestService.class.getSimpleName(), bucket, aleph_local_root_path + LOCAL_RUN_DIR_SUFFIX, max_run_time_secs.isPresent() ? Optional.of(new Tuple2<Long, Integer>(max_run_time_secs.get(), 9)) : Optional.empty()); if (null != err_pid._1()) { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Bucket error: " + err_pid._1()); } else { return ErrorUtils.buildSuccessMessage(ScriptHarvestService.class.getSimpleName(), message, "Bucket launched: " + err_pid._2()); } } else { return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "requires script or script_url to be specified in the harvester configs"); } } /** * Gets the pid if one exists for this bucket and tries to kill it. What if this job finished and something else has been assigned this pid? * * @param bucket */ public static BasicMessageBean stopScriptProcess(final DataBucketBean bucket, final ScriptHarvesterBucketConfigBean config, final String message, final String working_dir, final String aleph_root_path) { //STOP PID if its still running (verify its the same process) final Tuple2<String, Boolean> err_pid = ProcessUtils.stopProcess(ScriptHarvestService.class.getSimpleName(), bucket, aleph_root_path + LOCAL_RUN_DIR_SUFFIX, Optional.empty()); if (!err_pid._2) { //failed to stop, try to cleanup script file and bail out cleanupTempScriptFile(bucket, message); return ErrorUtils.buildErrorMessage(ScriptHarvestService.class.getSimpleName(), message, "Error stopping script (can result in script continuing to run on server, need to manually kill perhaps): " + err_pid._1, Optional.empty()); } // KILL tmp script file we copied locally if need be? cleanupTempScriptFile(bucket, message); return ErrorUtils.buildSuccessMessage(ScriptHarvestService.class.getSimpleName(), message, "Temporary script stopped and tmp files deleted successfully."); } private static BasicMessageBean cleanupTempScriptFile(final DataBucketBean bucket, final String message) { final String script_file_path = ScriptUtils.createTmpScriptFilePath(bucket); final File tmp_script_File = new File(script_file_path); if (tmp_script_File.exists()) tmp_script_File.delete(); return ErrorUtils.buildSuccessMessage(ScriptHarvestService.class.getSimpleName(), message, "Temporary script deleted successfully."); } /** * Just a helper function that calls stopScriptProcess then startScriptProcess * @param bucket */ public static BasicMessageBean restartScriptProcess(final DataBucketBean bucket, final IHarvestContext context, final String aleph_local_root_path, final String aleph_global_root_path, final ScriptHarvesterBucketConfigBean config, final String message, final String working_dir, Optional<Long> requested_num_objects, Optional<Long> max_run_time_secs) { final BasicMessageBean stop_result = stopScriptProcess(bucket, config, message, working_dir, aleph_local_root_path); final BasicMessageBean start_result = startScriptProcess(bucket, context, aleph_local_root_path, aleph_global_root_path, config, message, working_dir, requested_num_objects, max_run_time_secs); //merge the results and return that bean final Map<String, Object> details = new HashMap<String, Object>(); if (stop_result.details() != null) details.putAll(stop_result.details()); if (start_result.details() != null) details.putAll(start_result.details()); return new BasicMessageBean(new Date(), stop_result.success() && start_result.success(), ScriptHarvestService.class.getSimpleName(), "restartScriptProcess", 0, "STOP: " + stop_result.message() + " START: " + start_result.message(), details); } public static boolean isProcessRunning(final DataBucketBean bucket, final String aleph_root_path) { return ProcessUtils.isProcessRunning(ScriptHarvestService.class.getSimpleName(), bucket, aleph_root_path + LOCAL_RUN_DIR_SUFFIX); } }