Java tutorial
/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.integration; import cascading.cascade.Cascade; import cascading.flow.Flow; import cascading.property.AppProps; import hydrograph.engine.component.mapping.ComponentAdapterFactory; import hydrograph.engine.core.core.HydrographJob; import hydrograph.engine.core.core.HydrographRuntimeService; import hydrograph.engine.core.flowmanipulation.FlowManipulationContext; import hydrograph.engine.core.flowmanipulation.FlowManipulationHandler; import hydrograph.engine.core.helper.JAXBTraversal; import hydrograph.engine.core.props.PropertiesLoader; import hydrograph.engine.core.schemapropagation.SchemaFieldHandler; import hydrograph.engine.core.utilities.CommandLineOptionsProcessor; import hydrograph.engine.core.utilities.GeneralUtilities; import hydrograph.engine.flow.utils.ExecutionTrackingListener; import hydrograph.engine.hadoop.utils.HadoopConfigProvider; import hydrograph.engine.jaxb.commontypes.TypeProperties.Property; import hydrograph.engine.utilities.ExecutionTrackingUtilities; import hydrograph.engine.utilities.HiveMetastoreTokenProvider; import hydrograph.engine.utilities.UserClassLoader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Properties; @SuppressWarnings({ "rawtypes" }) public class HydrographRuntime implements HydrographRuntimeService { final String EXECUTION_TRACKING = "hydrograph.execution.tracking"; private static final String OPTION_DOT_PATH = "dotpath"; private Properties hadoopProperties = new Properties(); private ExecutionTrackingListener executionTrackingListener; private FlowBuilder flowBuilder; private RuntimeContext runtimeContext; private String[] args; private PropertiesLoader config; private FlowManipulationContext flowManipulationContext; private static Logger LOG = LoggerFactory.getLogger(HydrographRuntime.class); public void executeProcess(String[] args, HydrographJob hydrographJob) { this.args = args != null ? args.clone() : null; config = PropertiesLoader.getInstance(); LOG.info("Invoking initialize on runtime service"); initialize(config.getRuntimeServiceProperties(), this.args, hydrographJob, null, null); LOG.info("Preparation started"); prepareToExecute(); LOG.info("Preparation completed. Now starting execution"); LOG.info("Execution Started"); execute(); LOG.info("Execution Complete"); oncomplete(); } public void initialize(Properties config, String[] args, HydrographJob hydrographJob, String jobId, String UDFPath) { AppProps.setApplicationName(hadoopProperties, hydrographJob.getJAXBObject().getName()); hadoopProperties.putAll(config); Configuration conf = new HadoopConfigProvider(hadoopProperties).getJobConf(); SchemaFieldHandler schemaFieldHandler = new SchemaFieldHandler( hydrographJob.getJAXBObject().getInputsOrOutputsOrStraightPulls()); flowManipulationContext = new FlowManipulationContext(hydrographJob, args, schemaFieldHandler, jobId); FlowManipulationHandler flowManipulationHandler = new FlowManipulationHandler(); hydrographJob = flowManipulationHandler.execute(flowManipulationContext); if (hydrographJob.getJAXBObject().getRuntimeProperties() != null && hydrographJob.getJAXBObject().getRuntimeProperties().getProperty() != null) { for (Property property : hydrographJob.getJAXBObject().getRuntimeProperties().getProperty()) { hadoopProperties.put(property.getName(), property.getValue()); } } JAXBTraversal traversal = new JAXBTraversal(hydrographJob.getJAXBObject()); if (traversal.isHiveComponentPresentInFlow()) { try { HiveMetastoreTokenProvider.obtainTokenForHiveMetastore(conf); } catch (TException e) { throw new HydrographRuntimeException(e); } catch (IOException e) { throw new HydrographRuntimeException(e); } } String[] otherArgs; try { otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); } catch (IOException e) { throw new HydrographRuntimeException(e); } String argsString = ""; for (String arg : otherArgs) { argsString = argsString + " " + arg; } LOG.info("After processing arguments are:" + argsString); this.args = otherArgs; // setJar(otherArgs); hadoopProperties.putAll(conf.getValByRegex(".*")); ComponentAdapterFactory componentAdapterFactory = new ComponentAdapterFactory( hydrographJob.getJAXBObject()); flowBuilder = new FlowBuilder(); runtimeContext = new RuntimeContext(hydrographJob, traversal, hadoopProperties, componentAdapterFactory, flowManipulationContext.getSchemaFieldHandler(), UDFPath); LOG.info("Graph '" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "' initialized successfully"); } @Override public void prepareToExecute() { flowBuilder.buildFlow(runtimeContext); if (GeneralUtilities.IsArgOptionPresent(args, OPTION_DOT_PATH)) { writeDotFiles(); } } @Override public void execute() { if (GeneralUtilities.IsArgOptionPresent(args, CommandLineOptionsProcessor.OPTION_NO_EXECUTION)) { LOG.info(CommandLineOptionsProcessor.OPTION_NO_EXECUTION + " option is provided so skipping execution"); return; } if (ExecutionTrackingUtilities.getExecutionTrackingClass(EXECUTION_TRACKING) != null) { executionTrackingListener = (ExecutionTrackingListener) UserClassLoader.loadAndInitClass( ExecutionTrackingUtilities.getExecutionTrackingClass(EXECUTION_TRACKING), "execution tracking"); executionTrackingListener.addListener(runtimeContext); } for (Cascade cascade : runtimeContext.getCascade()) { cascade.complete(); } } @Override public void oncomplete() { flowBuilder.cleanup(flowManipulationContext.getTmpPath(), runtimeContext); } /** * Returns the statistics of components in a job. * * @see hydrograph.engine.execution.tracking.ComponentInfo */ @Override public Object getExecutionStatus() { if (executionTrackingListener != null) return executionTrackingListener.getStatus(); return null; } public Cascade[] getFlow() { return runtimeContext.getCascadingFlows(); } private void writeDotFiles() { String[] paths = GeneralUtilities.getArgsOption(args, OPTION_DOT_PATH); if (paths == null) { throw new HydrographRuntimeException( OPTION_DOT_PATH + " option is provided but is not followed by path"); } String basePath = paths[0]; LOG.info("Dot files will be written under " + basePath); String flowDotPath = basePath + "/" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "/" + "flow"; String flowStepDotPath = basePath + "/" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "/" + "flowstep"; int batchCounter = 0; for (Cascade cascadingFlow : runtimeContext.getCascadingFlows()) { for (Flow flows : cascadingFlow.getFlows()) { flows.writeDOT(flowDotPath + "_" + batchCounter); flows.writeStepsDOT(flowStepDotPath + "_" + batchCounter); batchCounter++; } } } public class HydrographRuntimeException extends RuntimeException { /** * */ private static final long serialVersionUID = -7891832980227676974L; public HydrographRuntimeException(String msg) { super(msg); } public HydrographRuntimeException(Throwable e) { super(e); } } /** * Method to kill the job */ @Override public void kill() { LOG.info("Kill signal received"); if (runtimeContext.getCascade() != null) { for (Cascade cascade : runtimeContext.getCascade()) { LOG.info("Killing Cascading jobs: " + cascade.getID()); cascade.stop(); } } else LOG.info("No cascading jobs present to kill. Exiting code."); System.exit(0); } }