Java tutorial
/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.lang.exception.ExceptionUtils; import org.codehaus.groovy.GroovyBugError; import com.rapidminer.operator.ProcessSetupError.Severity; import com.rapidminer.operator.ports.InputPortExtender; import com.rapidminer.operator.ports.OutputPortExtender; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeText; import com.rapidminer.parameter.TextType; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.plugin.Plugin; import groovy.lang.Binding; import groovy.lang.GroovyCodeSource; import groovy.lang.GroovyShell; import groovy.lang.Script; /** * <p> * This operator can be used to execute arbitrary Groovy scripts. This basically means that analysts * can write their own operators directly within the process by specifiying Java code and / or a * Groovy script which will be interpreted and executed during process runtime. For a complete * reference of Groovy scripting please refer to http://groovy.codehaus.org/. * </p> * * <p> * In addition to the usual scripting code elements from Groovy, the RapidMiner scripting operator * defines some special scripting elements: * </p> * <ul> * <li>If you use the standard <em>imports</em>, all important types like Example, ExampleSet, * Attribute, Operator etc. as well as the most important Java types like collections etc. are * automatically imported and can directly be used within the script. Hence, there is no need for * importing them in your script. However, you can of course import any other class you want and use * this in your script.</li> * <li>The <em>current operator</em> (the scripting operator for which you define the script) is * referenced by <code>operator</code>.<br /> * Example: <code>operator.log("text")</code></li> * <li>All <em>operator methods</em> like <code>log</code> (see above), accessing the input or the * complete process can directly be used by writing a preceding <code>operator</code>.<br /> * Example: <code>operator.getProcess()</code></li> * <li><em>Input of the operator</em> can be retrieved via the input method getInput(Class) of the * surrounding operator.<br /> * Example: <code>ExampleSet exampleSet = operator.getInput(ExampleSet.class)</code></li> * <li>You can <em>iterate over examples</em> with the following construct:<br /> * <code>for (Example example : exampleSet) { ... }</code></li> * <li>You can <em>retrieve example values</em> with the shortcut<br /> * <code>String value = example["attribute_name"];</code> or <br /> * <code>double value = example["attribute_name"];</code></li> * <li>You can <em>set example values</em> with * <code>example["attribute_name"] = "value";</code> or <br /> * <code>example["attribute_name"] = 5.7;</code></li> * </ul> * * <p> * <em>Note:</em> Scripts written for this operator may access Java code. Scripts may hence become * incompatible in future releases of RapidMiner. * </p> * * <p> * <em>Note:</em> As of RapidMiner Studio 7.5, Execute Script is now capable of executing many * scripts in parallel by implementing script caching. Before that, each execution parsed its script * again which was done on a global lock. * </p> * * @author Simon Fischer, Ingo Mierswa, Marco Boeck */ public class ScriptingOperator extends Operator { /** * Binding delegator capable of multi-threaded access. Using a regular binding on a script which * is run concurrently would result in the last set binding to be used. * * @author Marco Boeck * @since 7.5 */ private static class ConcurrentBindingDelegator extends Binding { private final ThreadLocal<Binding> binding = new ThreadLocal<Binding>() { @Override protected Binding initialValue() { return new Binding(); } }; @Override public Object getVariable(String name) { return binding.get().getVariable(name); } @Override public void setVariable(String name, Object value) { binding.get().setVariable(name, value); } @Override public boolean hasVariable(String name) { return binding.get().hasVariable(name); } @Override @SuppressWarnings("rawtypes") public Map getVariables() { return binding.get().getVariables(); } @Override public Object getProperty(String property) { return binding.get().getProperty(property); } @Override public void setProperty(String property, Object newValue) { binding.get().setProperty(property, newValue); } } private InputPortExtender inExtender = new InputPortExtender("input", getInputPorts()); private OutputPortExtender outExtender = new OutputPortExtender("output", getOutputPorts()); public static final String PARAMETER_SCRIPT = "script"; public static final String GROOVY_DOMAIN = "/groovyscript"; public static final String PARAMETER_STANDARD_IMPORTS = "standard_imports"; /** the max number of entries in the script cache */ private static final int MAX_CACHE_SIZE = 500; /** * this map contains lock objects for each script. This is necessary because we only want to * block a particular script which has not yet been parsed, but not other scripts. If there was * only synchronization on a static object, all script execution would be blocked JVM-wide * (think background process execution or RM Server) until parsing is finished. */ private static final Map<String, Object> LOCK_MAP = Collections .synchronizedMap(new LinkedHashMap<String, Object>(MAX_CACHE_SIZE + 1, 0.75f, true) { private static final long serialVersionUID = 1L; @Override public boolean removeEldestEntry(Map.Entry<String, Object> eldest) { return size() > MAX_CACHE_SIZE; } }); /** * this map contains the parsed scripts. This greatly speeds up operator execution, especially * in loops. Will drop the oldest scripts that have not been used if the max cache size is * exceeded. */ private static final Map<String, Script> SCRIPT_CACHE = Collections .synchronizedMap(new LinkedHashMap<String, Script>(MAX_CACHE_SIZE + 1, 0.75f, true) { private static final long serialVersionUID = 1L; @Override public boolean removeEldestEntry(Map.Entry<String, Script> eldest) { return size() > MAX_CACHE_SIZE; } }); public ScriptingOperator(OperatorDescription description) { super(description); inExtender.start(); outExtender.start(); } @Override protected void performAdditionalChecks() { super.performAdditionalChecks(); try { String scriptWithoutReplacedMacros = getParameters().getParameter(PARAMETER_SCRIPT); int startIndex = scriptWithoutReplacedMacros.indexOf(Operator.MACRO_STRING_START); int endIndex = scriptWithoutReplacedMacros.indexOf(Operator.MACRO_STRING_END, startIndex); if (startIndex > -1 && endIndex > -1) { addError(new SimpleProcessSetupError(Severity.WARNING, getPortOwner(), "script_has_macro")); } } catch (UndefinedParameterError e) { // should not happen - ignore } } @Override public void doWork() throws OperatorException { String script = getParameterAsString(PARAMETER_SCRIPT); if (getParameterAsBoolean(PARAMETER_STANDARD_IMPORTS)) { StringBuffer imports = new StringBuffer(); imports.append("import com.rapidminer.example.*;\n"); imports.append("import com.rapidminer.example.set.*;\n"); imports.append("import com.rapidminer.example.table.*;\n"); imports.append("import com.rapidminer.operator.*;\n"); imports.append("import com.rapidminer.tools.Tools;\n"); imports.append("import java.util.*;\n"); script = imports.toString() + script; } List<IOObject> input = inExtender.getData(IOObject.class, false); Object result; try { // cache access is synchronized on a per-script basis to prevent Execute Script // inside a loop to start many parsings at the same time Object lock; synchronized (LOCK_MAP) { lock = LOCK_MAP.get(script); if (lock == null) { lock = new Object(); LOCK_MAP.put(script, lock); } } Script cachedScript; synchronized (lock) { cachedScript = SCRIPT_CACHE.get(script); if (cachedScript == null) { // use the delegator which is capable of handling multi-threaded access as // binding GroovyShell shell = new GroovyShell(Plugin.getMajorClassLoader(), new ConcurrentBindingDelegator()); GroovyCodeSource codeSource = new GroovyCodeSource(script, "customScript", GROOVY_DOMAIN); codeSource.setCachable(false); cachedScript = shell.parse(codeSource); SCRIPT_CACHE.put(script, cachedScript); } } // even though we cache the script, we need to use a new binding for each execution to // avoid multiple concurrent scripts running on the same/editing the same binding cachedScript.getBinding().setVariable("input", input); cachedScript.getBinding().setVariable("operator", this); // run the script via the delegator result = cachedScript.run(); } catch (SecurityException e) { throw new UserError(this, e, "scriptingOperator_security", e.getMessage()); } catch (GroovyBugError e) { if (e.getCause() instanceof SecurityException) { throw new UserError(this, e.getCause(), "scriptingOperator_security", e.getCause().getMessage()); } else { throw new UserError(this, e, 945, "Groovy", e); } } catch (Throwable e) { throw new UserError(this, e, 945, "Groovy", e, ExceptionUtils.getStackTrace(e)); } if (result instanceof Object[]) { outExtender.deliver(Arrays.asList((IOObject[]) result)); } else if (result instanceof List) { List<IOObject> results = new LinkedList<IOObject>(); for (Object single : (List<?>) result) { if (single instanceof IOObject) { results.add((IOObject) single); } else { getLogger().warning("Unknown result type: " + single); } } outExtender.deliver(results); } else { if (result != null) { if (result instanceof IOObject) { outExtender.deliver(Collections.singletonList((IOObject) result)); } else { getLogger().warning("Unknown result: " + result.getClass() + ": " + result); } } } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeText(PARAMETER_SCRIPT, "The script to execute.", TextType.GROOVY, false); type.setExpert(false); type.setDefaultValue("/* \n" + " * You can use both Java and Groovy syntax in this script.\n" + " * \n * Note that you have access to the following two predefined variables:\n" + " * 1) input (an array of all input data)\n" + " * 2) operator (the operator instance which is running this script)\n" + " */\n" + "\n" + "// Take first input data and treat it as generic IOObject\n" + "// Alternatively, you could treat it as an ExampleSet if it is one:\n" + "// ExampleSet inputData = input[0];\n" + "IOObject inputData = input[0];\n" + "\n\n" + "// You can add any code here\n" + "\n" + "\n" + "// This line returns the first input as the first output\n" + "return inputData;"); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_STANDARD_IMPORTS, "Indicates if standard imports for examples and attributes etc. should be automatically generated.", true)); return types; } }