Java tutorial
/** * Copyright 2014-2015 SHAF-WORK * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.shaf.core.process.config; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.concurrent.TimeUnit; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.shaf.core.process.type.dist.DistributedProcess; import org.shaf.core.util.Log; import com.google.common.base.Objects; import com.google.common.base.Objects.ToStringHelper; import com.google.common.base.Strings; /** * The process configuration. * * @author Mykola Galushka */ public class ProcessConfiguration extends PropertiesConfiguration { /** * Defines a logger. */ private static final Log LOG = Log.forClass(ProcessConfiguration.class); /** * The property name to define the data provider prefix. */ private final static String DATA_PROVIDER_PREFIX = "shaf.data.provider"; /** * The property name to define the data consumer prefix. */ private final static String DATA_CONSUMER_PREFIX = "shaf.data.consumer"; /** * The property name to define the process base directory. */ public final static String PROCESS_BASE = "shaf.process.base"; /** * The property name to define the process execution timeout. */ public final static String PROCESS_EXECUTION_TIMEOUT = "shaf.process.execution.timeout"; /** * The property name to define the process execution timeout unit. */ public final static String PROCESS_EXECUTION_TIMEOUT_UNIT = "shaf.process.execution.timeout.unit"; /** * The property name to define the distributed process input directories. */ public final static String PROCESS_DISTRIBUTED_INPUTS = "shaf.process.dist.inputs"; /** * The property name to define the process execution output directories. */ public final static String PROCESS_DISTRIBUTED_OUTPUT = "shaf.process.dist.output"; /** * The property name to define the flag which forces the emulation mode even * if Hadoop environment is available. */ public final static String FORCE_EMULATION_MODE = "shaf.force.emulation.mode"; /** * The property name to define the flag which forces deletion of the output * directory before running {@link DistributedProcess}. */ public final static String FORCE_DELETE_OUTPUT = "shaf.force.delete.output"; /** * The property name to define the maximum buffer length for generic I/O * operations. */ public final static String IO_MAX_BUFFER_LENGTH = "shaf.io.max.buffer.length"; /** * Constructs a new process configuration. * * @param config * the base configuration. */ public ProcessConfiguration(final Configuration config) { super(); this.copy(config); // This configuration provides its own mechanisms for parsing // "composite" properties. So original parsing should be disabled. super.setDelimiterParsingDisabled(true); // Logs properties which were copied. Iterator<String> keys = this.getKeys(); while (keys.hasNext()) { String key = keys.next(); LOG.debug("Copies: " + key + "=" + this.getProperty(key)); } // Sets the properties, which must me present in configuration. this.setDefaultProperties(); } /** * Constructs a new process configuration. * * @param path * the path to the configuration file. * @throws ConfigurationException * if error occurs during configuration load. */ public ProcessConfiguration(final String path) throws ConfigurationException { super(path); // This configuration provides its own mechanisms for parsing // "composite" properties. So original parsing should be disabled. super.setDelimiterParsingDisabled(true); // Logs properties which were loaded. Iterator<String> keys = this.getKeys(); while (keys.hasNext()) { String key = keys.next(); LOG.debug("Loads: " + key + "=" + this.getProperty(key)); } // Sets the properties, which must me present in configuration. this.setDefaultProperties(); } /** * Constructs a new default process configuration. */ public ProcessConfiguration() { super(); // This configuration provides its own mechanisms for parsing // "composite" properties. So original parsing should be disabled. super.setDelimiterParsingDisabled(true); // Sets the properties, which must me present in configuration. this.setDefaultProperties(); } /** * Tests if the specified property is defined. * * @param key * the property key to test. * @return {@code true} id the property is defined and {@code false} * otherwise. */ private final boolean isPropertyDefined(final String key) { return super.containsKey(key); } /** * Added functionality for logging the updated properties. */ @Override public void setProperty(String key, Object value) { if (this.isPropertyDefined(key)) { LOG.trace("Set property: " + key + "=" + value); } else { LOG.trace("Add property: " + key + "=" + value); } super.setProperty(key, value); } /** * Search and returns property by its key. * * @param key * the property key to search. * @throws PropertyNotFoundException * if the specified property is not found. */ private final Object findProperty(final String key) throws PropertyNotFoundException { if (this.isPropertyDefined(key)) { return super.getProperty(key); } else { throw new PropertyNotFoundException(key); } } /** * Sets the default properties in constructors. */ private final void setDefaultProperties() { // Sets the "force emulation mode" property flag if it is not found. if (!this.isPropertyDefined(FORCE_EMULATION_MODE)) { LOG.warn("The '" + FORCE_EMULATION_MODE + "' property is not defined."); setForceEmulatorMode(false); } // Sets the "force delete output" property flag if it is not found. if (!this.isPropertyDefined(FORCE_DELETE_OUTPUT)) { LOG.warn("The '" + FORCE_DELETE_OUTPUT + "' property is not defined."); setForceDeleteOutput(true); } // Sets the process base if it is not found. if (!this.isPropertyDefined(PROCESS_BASE)) { LOG.warn("The '" + PROCESS_BASE + "' property is not defined."); this.setBase(System.getProperty("java.io.tmpdir")); } // Sets the max duration for process execution if it is not found. if (!this.isPropertyDefined(PROCESS_EXECUTION_TIMEOUT) || !this.isPropertyDefined(PROCESS_EXECUTION_TIMEOUT_UNIT)) { LOG.warn("The '" + PROCESS_EXECUTION_TIMEOUT + "' or '" + PROCESS_EXECUTION_TIMEOUT_UNIT + "'property is not defined."); this.setTimeout(1, TimeUnit.HOURS); } // Sets the default maximum buffer length for the generic I/O if it is // not found. if (!this.isPropertyDefined(IO_MAX_BUFFER_LENGTH)) { LOG.warn("The '" + IO_MAX_BUFFER_LENGTH + "' property is not defined."); this.setIOMaxBufferLength(10240L); } } /** * Returns properties associated with the specified data provider. * * @param name * the data provider name. * @return the data provide properties. * @throws PropertyNotFoundException * if the specified property is not found. */ public final Properties getDataProvider(final String name) throws PropertyNotFoundException { return this.getProperties(DATA_PROVIDER_PREFIX + "." + name); } /** * Returns properties associated with the specified data consumer. * * @param name * the data consumer name. * @return the data consumer properties. * @throws PropertyNotFoundException * if the specified property is not found. */ public final Properties getDataConsumer(final String name) throws PropertyNotFoundException { return this.getProperties(DATA_CONSUMER_PREFIX + "." + name); } // ------------------------------------------------------------------------ /** * Sets the flag which forces the emulation mode, even if the Hadoop * environment is available. * * @param flag * the flag value to set. {@code true} indicates that emulation * mode is enforced and {@code false} otherwise. * @return the reference to itself. */ public final ProcessConfiguration setForceEmulatorMode(final boolean flag) { this.setProperty(FORCE_EMULATION_MODE, Boolean.toString(flag)); return this; } /** * Returns the flag which forces the emulation mode, even if the Hadoop * environment is available. * * @return {@code true} if emulation mode is enforced and {@code false} * otherwise. */ public final boolean isForceEmulatorMode() { try { return Boolean.parseBoolean((String) findProperty(FORCE_EMULATION_MODE)); } catch (PropertyNotFoundException exc) { LOG.warn("The property is not defined. The method returns the default value: false", exc); return false; } } /** * Sets the flag which forces deletion of the output directory before * running {@link DistributedProcess}. * * @param flag * the flag value to set. {@code true} if deletion of the output * directory before running distributed process is enforced and * {@code false} otherwise. * @return the reference to itself. */ public final ProcessConfiguration setForceDeleteOutput(final boolean flag) { this.setProperty(FORCE_DELETE_OUTPUT, Boolean.toString(flag)); return this; } /** * Returns the flag which forces deletion of the output directory before * running {@link DistributedProcess}. * * @return {@code true} if deletion of the output directory before running * distributed process is enforced and {@code false} otherwise. */ public final boolean isForceDeleteOutput() { try { return Boolean.parseBoolean((String) findProperty(FORCE_DELETE_OUTPUT)); } catch (PropertyNotFoundException exc) { LOG.warn("The property is not defined. The method returns the default value: false", exc); return false; } } /** * Sets the base directory. * * @param path * the base directory path to set. * @return the reference to itself. */ public final ProcessConfiguration setBase(final String path) { this.setProperty(PROCESS_BASE, path); return this; } /** * Returns the base directory. * * @return the base directory path. * @throws PropertyNotFoundException * if property {@link #PROCESS_BASE} is not found. */ public final String getBase() throws PropertyNotFoundException { return (String) findProperty(PROCESS_BASE); } /** * Sets the process execution timeout. * * @param timeout * the timeout to set. * @param timeunit * the time unit to set. * @return the reference to itself. */ public final ProcessConfiguration setTimeout(final long timeout, final TimeUnit timeunit) { if (timeout <= 0) { throw new IllegalArgumentException( "The process execution timeout should be > 0, " + "but the current value: " + timeout); } if (timeunit == null) { throw new NullPointerException("timeunit"); } this.setProperty(PROCESS_EXECUTION_TIMEOUT, String.valueOf(timeout)); this.setProperty(PROCESS_EXECUTION_TIMEOUT_UNIT, timeunit.name().toLowerCase()); return this; } /** * Returns the process execution timeout. * * @return the process execution timeout. * @throws PropertyNotFoundException * if property {@link #PROCESS_EXECUTION_TIMEOUT} is not found. * @throws PropertyInvalidTypeException * if property {@link #PROCESS_EXECUTION_TIMEOUT} type is not * {@code long}. */ public final long getTimeout() throws PropertyNotFoundException, PropertyInvalidTypeException { try { return Long.parseLong((String) findProperty(PROCESS_EXECUTION_TIMEOUT)); } catch (NumberFormatException exc) { throw new PropertyInvalidTypeException(PROCESS_EXECUTION_TIMEOUT, exc); } } /** * Returns the process execution timeout unit. * * @return the process execution timeout unit. * @throws PropertyNotFoundException * if property {@link #PROCESS_EXECUTION_TIMEOUT_UNIT} is not * found. * @throws PropertyInvalidTypeException * if property {@link #PROCESS_EXECUTION_TIMEOUT_UNIT} type is * not {@code TimeUnit}. */ public final TimeUnit getTimeoutUnit() throws PropertyNotFoundException, PropertyInvalidTypeException { try { return TimeUnit.valueOf(((String) findProperty(PROCESS_EXECUTION_TIMEOUT_UNIT)).toUpperCase()); } catch (IllegalArgumentException | NullPointerException exc) { throw new PropertyInvalidTypeException(PROCESS_EXECUTION_TIMEOUT_UNIT, exc); } } /** * Sets the maximum buffer length for the generic I/O operations. * * @param length * the maximum buffer length to set. * @return the reference to itself. */ public final ProcessConfiguration setIOMaxBufferLength(final long length) { this.setProperty(IO_MAX_BUFFER_LENGTH, String.valueOf(length)); return this; } /** * Returns the maximum buffer length for the generic I/O operations. * * @return the the maximum buffer length. * @throws PropertyNotFoundException * if property {@link #IO_MAX_BUFFER_LENGTH} is not found. */ public final long getIOMaxBufferLength() throws PropertyNotFoundException { return Long.parseLong((String) findProperty(IO_MAX_BUFFER_LENGTH)); } /** * Adds an input directory with assigned mapper. * * @param path * the path to input data. * @param cls * the distributed process class to perform mapping. * @return the reference to itself. * @throws InputModeAlreadySetException * if attempt to change the input mode. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_INPUTS} is not found. */ public final ProcessConfiguration addInput(final String path, final Class<? extends DistributedProcess<?, ?, ?, ?, ?, ?>> cls) throws InputModeAlreadySetException, PropertyNotFoundException { if (Strings.isNullOrEmpty(path)) { if (path == null) { throw new NullPointerException("path"); } else { throw new IllegalArgumentException("The path is empry."); } } if (this.isInputDefined()) { boolean isError = this.isMemoryInput(); isError |= this.isDatabaseInput(); isError |= (this.isMultiFormatPathInput() && cls == null); isError |= (this.isSingleFormatPathInput() && cls != null); if (isError) { throw new InputModeAlreadySetException(this.getInputType()); } } this.setProperty(PROCESS_DISTRIBUTED_INPUTS, ((this.isInputDefined()) ? this.getInput() + "," : "") + path + ((cls != null) ? ";" + cls.getCanonicalName() : "")); return this; } /** * Adds an input directory. * * @param path * the path to input data. * @return the reference to itself. * @throws InputModeAlreadySetException * if attempt to change the input mode. * @throws InputModeAlreadySetException * if attempt to change the input mode. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_INPUTS} is not found. */ public final ProcessConfiguration addInput(final String path) throws InputModeAlreadySetException, PropertyNotFoundException { return this.addInput(path, null); } /** * Returns input directories as a {@code String}. In the <b>multi-map * mode</b> all directories are returned with assigned mappers. * * <p> * The input string in the <b>single-map mode</b> has the following format: * * <pre> * path_1,path_2,...,path_N * </pre> * * <p> * The input string in the <b>multi-map mode</b> has the following format: * * <pre> * path_1;map_1,path_2;map_2,...,path_N;map_N * </pre> * * <p> * where <i>path_i</i> is a relative path to the directory with mapping data * and <i>map_i</i> is a mapper canonical class name which assigned to * perform a mapping operation. * * @return the string input directories. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_INPUTS} is not found. */ public final String getInput() throws PropertyNotFoundException { return ((String) findProperty(PROCESS_DISTRIBUTED_INPUTS)).trim(); } /** * Returns input directories as a {@code String[]}. In the <b>multi-map * mode</b> all directories are returned with assigned mappers. * * @return the array with input directories. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_INPUTS} is not found. * @see #getInput() */ public final String[] getInputAsArray() throws PropertyNotFoundException { String input = this.getInput(); if (input == null) { return new String[0]; } else { return input.split("\\,"); } } public final InputTokenizer getInputTokenizer() throws PropertyNotFoundException { return new InputTokenizer(this.getInput()); } /** * Returns the map with input directories. In the <b>multi-map mode</b> all * directories are returned with assigned mappers. The {@code key} * represents an input path and {@code value} represents an assigned mapper * (in multi-map mode). * * @return the map with input directories. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_INPUTS} is not found. * @see #getInput() */ public final Map<String, String> getInputAsMap() throws PropertyNotFoundException { String[] inputs = this.getInputAsArray(); Map<String, String> lookup = new HashMap<>(); if (inputs.length > 0) { for (String input : inputs) { if (this.getInputType() == InputType.MULTI_FORMAT_PATH) { String[] path_mapper = input.split("\\;"); lookup.put(path_mapper[0], path_mapper[1]); } else { lookup.put(input, null); } } } return lookup; } /** * Sets the output directory. * * @param path * the path to output data. * @return the reference to itself. * @throws OutputModeAlreadySetException * if attempt to change the output mode. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_OUTPUT} is not found. */ public final ProcessConfiguration setOutput(final String path) throws OutputModeAlreadySetException, PropertyNotFoundException { if (Strings.isNullOrEmpty(path)) { if (path == null) { throw new NullPointerException("path"); } else { throw new IllegalArgumentException("The path is empry."); } } if (this.isOutputDefined()) { throw new OutputModeAlreadySetException(this.getOutputType()); } this.setProperty(PROCESS_DISTRIBUTED_OUTPUT, path); return this; } /** * Returns the output directory. * * @return the output directory. * @throws PropertyNotFoundException * if property {@link #PROCESS_DISTRIBUTED_OUTPUT} is not found. */ public final String getOutput() throws PropertyNotFoundException { return (String) this.findProperty(PROCESS_DISTRIBUTED_OUTPUT); } /** * Returns the input type. * * @return the input type. */ public final InputType getInputType() { try { /* * Tests if the PROCESS_DISTRIBUTED_INPUTS property is defined. */ this.findProperty(PROCESS_DISTRIBUTED_INPUTS); /* * Tests the memory input. The indicator of the memory based input * is a presents of prefix "#" in the PROCESS_DISTRIBUTED_INPUTS * property. */ if (this.getInput().charAt(0) == '#') { return InputType.MEMORY; } else { String[] inputs = this.getInputAsArray(); for (String input : inputs) { /* * Test id the string defining input has the delimiter * character ";". The presents of this character indicates * that input is split on a data path and a mapper class. * Such input can inly be provided for the multi-mapping * mode. */ if (input.contains(";")) { return InputType.MULTI_FORMAT_PATH; } } return InputType.SINGLE_FORMAT_PATH; } } catch (PropertyNotFoundException exc) { LOG.warn("The property is not defined. The method returns the default value: " + InputType.UNKNOWN, exc); return InputType.UNKNOWN; } // TODO: add implementation for defining the database input type; } /** * Test if the input is defined. This requires the input type to be not * equal to {@link InputType#UNKNOWN}. * * @return {@code true} if the input is defined and {@code false} otherwise. */ public final boolean isInputDefined() { return this.getInputType() != InputType.UNKNOWN; } /** * Test if the input format type is {@link InputType#SINGLE_FORMAT_PATH}. * * @return {@code true} if the input format type is * {@link InputType#SINGLE_FORMAT_PATH} and {@code false} otherwise. */ public final boolean isSingleFormatPathInput() { return this.getInputType() == InputType.SINGLE_FORMAT_PATH; } /** * Test if the input format type is {@link InputType#MULTI_FORMAT_PATH}. * * @return {@code true} if the input format type is * {@link InputType#MULTI_FORMAT_PATH} and {@code false} otherwise. */ public final boolean isMultiFormatPathInput() { return this.getInputType() == InputType.MULTI_FORMAT_PATH; } /** * Test if the input format type is {@link InputType#DATABASE}. * * @return {@code true} if the input format type is * {@link InputType#DATABASE} and {@code false} otherwise. */ public final boolean isDatabaseInput() { return this.getInputType() == InputType.DATABASE; } /** * Test if the input format type is {@link InputType#MEMORY}. * * @return {@code true} if the input format type is {@link InputType#MEMORY} * and {@code false} otherwise. */ public final boolean isMemoryInput() { return this.getInputType() == InputType.MEMORY; } /** * Returns the output type. * * @return the output type. */ public final OutputType getOutputType() { try { /* * Tests if the PROCESS_DISTRIBUTED_OUTPUT property is defined. */ this.findProperty(PROCESS_DISTRIBUTED_OUTPUT); /* * Tests the memory output. The indicator of the memory based output * is a presents of prefix "#" in the PROCESS_DISTRIBUTED_OUTPUT * property. */ if (this.getOutput().charAt(0) == '#') { return OutputType.MEMORY; } else { return OutputType.PATH; } } catch (PropertyNotFoundException exc) { LOG.warn("The property is not defined. The method returns the default value: " + OutputType.UNKNOWN, exc); return OutputType.UNKNOWN; } } /** * Test if the output is defined. This requires the output type to be not * equal to {@link OutputType#UNKNOWN}. * * @return {@code true} if the output is defined and {@code false} * otherwise. */ public final boolean isOutputDefined() { return this.getOutputType() != OutputType.UNKNOWN; } /** * Test if the output format type is {@link OutputType#PATH}. * * @return {@code true} if the output format type is {@link OutputType#PATH} * and {@code false} otherwise. */ public final boolean isPathOutput() { return this.getOutputType() == OutputType.PATH; } /** * Test if the output format type is {@link OutputType#MEMORY}. * * @return {@code true} if the output format type is * {@link OutputType#MEMORY} and {@code false} otherwise. */ public final boolean isMemoryOutput() { return this.getOutputType() == OutputType.MEMORY; } /** * Returns the {@code ProcessConfiguration} as a string. */ @Override public String toString() { ToStringHelper helper = Objects.toStringHelper(this); Iterator<String> keys = super.getKeys(); while (keys.hasNext()) { String key = keys.next(); helper.add(key, super.getProperty(key)); } return helper.toString(); } /** * The input type. * * @author Mykola Galushka */ public enum InputType { /** * Defines an unknown type. */ UNKNOWN("unknown"), /** * Defines a single-map input type, where the data source are files of * the same format. */ SINGLE_FORMAT_PATH("single-path"), /** * Defines a multi-map input type, where the data source are files of * the different formats. */ MULTI_FORMAT_PATH("multi-path"), /** * Defines a single-map input type, where the data source is database. */ DATABASE("database"), /** * Defines a single-map input type, where the data source is memory. */ MEMORY("memory"); /** * The input type label. */ private final String label; /** * Constructs a new input type. * * @param label * the input type label. */ InputType(final String label) { this.label = label; } /** * Returns the input type label. * * @return the input type label. */ public final String getLabel() { return this.label; } } /** * The output type. * * @author Mykola Galushka */ public enum OutputType { /** * Defines an unknown type. */ UNKNOWN("unknown"), /** * Defines an output path type. */ PATH("path"), /** * Defines an output memory type. */ MEMORY("memory"); /** * The output type label. */ private final String label; /** * Constructs a new output type. * * @param label * the output type label. */ OutputType(final String label) { this.label = label; } /** * Returns the output type label. * * @return the output type label. */ public final String getLabel() { return this.label; } } /** * The input tokenizer. * * @author Mykola Galushka */ public static class InputTokenizer { /** * The input strings. */ final String[] inputs; /** * The next token position. */ private int pos; /** * The current path. */ private String path; /** * The current mapper class name. */ private String mcls; /** * Constructs a new input tokenizer. * * @param input * the input string, which needs to be tokenized. */ private InputTokenizer(final String input) { this.inputs = input.split("\\,"); this.pos = 0; this.path = null; this.mcls = null; } /** * Retrieves the next input token. * * @return {@code true} if the next input token is available and * {@code false} otherwise. */ public final boolean nextToken() { if (this.pos >= this.inputs.length) { this.path = null; this.mcls = null; return false; } else { String[] pair = this.inputs[this.pos++].split("\\;"); this.path = pair[0]; if (pair.length > 1) { this.mcls = pair[1]; } else { this.mcls = null; } return true; } } /** * Returns the current input path. * * @return the current input path. */ public final String getPath() { return this.path; } /** * Returns the current mapper class name. * * @return the current mapper class name. */ public final String getMapperClassName() { return this.mcls; } /** * Tests if the process mapper class name defined. * * @return {@code true} if the process mapper class name defined and * {@code false} otherwise. */ public final boolean isMapperClassNameDefined() { return this.getMapperClassName() != null; } } }