Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.oodt.cas.pge; import org.apache.commons.lang.Validate; import org.apache.oodt.cas.crawl.AutoDetectProductCrawler; import org.apache.oodt.cas.crawl.ProductCrawler; import org.apache.oodt.cas.crawl.StdProductCrawler; import org.apache.oodt.cas.crawl.status.IngestStatus; import org.apache.oodt.cas.crawl.structs.exceptions.CrawlerActionException; import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException; import org.apache.oodt.cas.filemgr.structs.exceptions.ConnectionException; import org.apache.oodt.cas.filemgr.structs.exceptions.DataTransferException; import org.apache.oodt.cas.metadata.Metadata; import org.apache.oodt.cas.metadata.SerializableMetadata; import org.apache.oodt.cas.metadata.exceptions.CasMetadataException; import org.apache.oodt.cas.metadata.exceptions.MetExtractionException; import org.apache.oodt.cas.metadata.exceptions.MetExtractorConfigReaderException; import org.apache.oodt.cas.metadata.exceptions.NamingConventionException; import org.apache.oodt.cas.metadata.filenaming.PathUtilsNamingConvention; import org.apache.oodt.cas.pge.config.DynamicConfigFile; import org.apache.oodt.cas.pge.config.OutputDir; import org.apache.oodt.cas.pge.config.PgeConfig; import org.apache.oodt.cas.pge.config.RegExprOutputFiles; import org.apache.oodt.cas.pge.config.XmlFilePgeConfigBuilder; import org.apache.oodt.cas.pge.exceptions.PGEException; import org.apache.oodt.cas.pge.metadata.PgeMetadata; import org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys; import org.apache.oodt.cas.pge.staging.FileManagerFileStager; import org.apache.oodt.cas.pge.staging.FileStager; import org.apache.oodt.cas.pge.writers.PcsMetFileWriter; import org.apache.oodt.cas.pge.writers.SciPgeConfigFileWriter; import org.apache.oodt.cas.workflow.exceptions.WorkflowException; import org.apache.oodt.cas.workflow.metadata.CoreMetKeys; import org.apache.oodt.cas.workflow.structs.WorkflowTaskConfiguration; import org.apache.oodt.cas.workflow.structs.WorkflowTaskInstance; import org.apache.oodt.cas.workflow.structs.exceptions.WorkflowTaskInstanceException; import org.apache.oodt.cas.workflow.system.XmlRpcWorkflowManagerClient; import org.apache.oodt.cas.workflow.util.ScriptFile; import org.apache.oodt.commons.exceptions.CommonsException; import org.apache.oodt.commons.exec.ExecUtils; import org.apache.xmlrpc.XmlRpcException; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.Lists; import org.springframework.context.support.FileSystemXmlApplicationContext; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.text.ParseException; import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; import java.util.regex.Pattern; import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.*; import static org.apache.oodt.cas.pge.metadata.PgeTaskStatus.*; import static org.apache.oodt.cas.pge.util.GenericPgeObjectFactory.*; /** * Runs a CAS-style Product Generation Executive based on the PCS Wrapper * Architecture from mattmann et al. on OCO. * * @author mattmann (Chris Mattmann) * @author bfoster (Brian Foster) */ public class PGETaskInstance implements WorkflowTaskInstance { protected Logger logger = Logger.getLogger(PGETaskInstance.class.getName()); protected XmlRpcWorkflowManagerClient wm; protected String workflowInstId; protected PgeMetadata pgeMetadata; protected PgeConfig pgeConfig; protected PGETaskInstance() { } @Override public void run(Metadata metadata, WorkflowTaskConfiguration config) throws WorkflowTaskInstanceException { try { // Initialize CAS-PGE. pgeMetadata = createPgeMetadata(metadata, config); pgeConfig = createPgeConfig(); runPropertyAdders(); wm = createWorkflowManagerClient(); workflowInstId = getWorkflowInstanceId(); logger = createLogger(); // use workflow ID specific logger from now on // Write out PgeMetadata. dumpMetadataIfRequested(); // Setup the PGE. createExeDir(); createOuputDirsIfRequested(); updateStatus(CONF_FILE_BUILD.getWorkflowStatusName()); createDynamicConfigFiles(); updateStatus(STAGING_INPUT.getWorkflowStatusName()); stageFiles(); // Run the PGE. runPge(); // Ingest products. runIngestCrawler(createProductCrawler()); // Commit dynamic metadata. updateDynamicMetadata(); } catch (Exception e) { logger.log(Level.SEVERE, "PGETask FAILED!!! : " + e.getMessage(), e); throw new WorkflowTaskInstanceException("PGETask FAILED!!! : " + e.getMessage(), e); } } protected void updateStatus(String status) throws PGEException, XmlRpcException, IOException { logger.info("Updating status to workflow as [" + status + "]"); if (!wm.updateWorkflowInstanceStatus(workflowInstId, status)) { throw new PGEException("Failed to update workflow status : client returned false"); } } protected Logger createLogger() throws IOException, PGEException { File logDir = new File(pgeConfig.getExeDir(), "logs"); if (!(logDir.exists() || logDir.mkdirs())) { throw new PGEException("mkdirs for logs directory return false"); } Logger logger = Logger.getLogger(PGETaskInstance.class.getName() + "." + workflowInstId); FileHandler handler = new FileHandler(new File(logDir, createLogFileName()).getAbsolutePath()); handler.setEncoding("UTF-8"); handler.setFormatter(new SimpleFormatter()); logger.addHandler(handler); return logger; } protected String createLogFileName() { String filenamePattern = pgeMetadata.getMetadata(LOG_FILENAME_PATTERN); if (filenamePattern != null) { return filenamePattern; } else { return pgeMetadata.getMetadata(NAME) + "." + System.currentTimeMillis() + ".log"; } } protected PgeMetadata createPgeMetadata(Metadata dynMetadata, WorkflowTaskConfiguration config) { logger.info("Converting workflow configuration to static metadata..."); Metadata staticMetadata = new Metadata(); for (Object objKey : config.getProperties().keySet()) { String key = (String) objKey; PgeTaskMetKeys metKey = PgeTaskMetKeys.getByName(key); if (metKey != null && metKey.isVector()) { List<String> values = Lists.newArrayList( Splitter.on(",").trimResults().omitEmptyStrings().split(config.getProperty(key))); logger.finest("Adding static metadata: key = [" + key + "] value = " + values); staticMetadata.addMetadata(key, values); } else { String value = config.getProperty(key); logger.finest("Adding static metadata: key = [" + key + "] value = [" + value + "]"); staticMetadata.addMetadata(key, value); } } logger.info("Loading workflow context metadata..."); for (String key : dynMetadata.getAllKeys()) { logger.finest( "Adding dynamic metadata: key = [" + key + "] value = " + dynMetadata.getAllMetadata(key)); } return new PgeMetadata(staticMetadata, dynMetadata); } protected PgeConfig createPgeConfig() throws Exception { logger.info("Create PgeConfig..."); String pgeConfigBuilderClass = pgeMetadata.getMetadata(PGE_CONFIG_BUILDER); if (pgeConfigBuilderClass != null) { logger.info("Using PgeConfigBuilder: " + pgeConfigBuilderClass); return createPgeConfigBuilder(pgeConfigBuilderClass, logger).build(pgeMetadata); } else { logger.info("Using default PgeConfigBuilder: " + XmlFilePgeConfigBuilder.class.getCanonicalName()); return new XmlFilePgeConfigBuilder().build(pgeMetadata); } } protected void runPropertyAdders() throws PGEException { try { logger.info("Loading/Running property adders..."); List<String> propertyAdders = pgeMetadata.getAllMetadata(PROPERTY_ADDERS); if (propertyAdders != null) { for (String propertyAdder : propertyAdders) { runPropertyAdder(loadPropertyAdder(propertyAdder)); } } else { logger.info("No property adders specified"); } } catch (Exception e) { throw new PGEException("Failed to instanciate/run Property Adders : " + e.getMessage(), e); } } protected ConfigFilePropertyAdder loadPropertyAdder(String propertyAdderClasspath) { logger.fine("Loading property adder: " + propertyAdderClasspath); return createConfigFilePropertyAdder(propertyAdderClasspath, logger); } protected void runPropertyAdder(ConfigFilePropertyAdder propAdder) { logger.info("Running property adder: " + propAdder.getClass().getCanonicalName()); propAdder.addConfigProperties(pgeMetadata, pgeConfig.getPropertyAdderCustomArgs()); } protected XmlRpcWorkflowManagerClient createWorkflowManagerClient() throws MalformedURLException { String url = pgeMetadata.getMetadata(WORKFLOW_MANAGER_URL); logger.info("Creating WorkflowManager client for url [" + url + "]"); Validate.notNull(url, "Must specify " + WORKFLOW_MANAGER_URL); return new XmlRpcWorkflowManagerClient(new URL(url)); } protected String getWorkflowInstanceId() { String instanceId = pgeMetadata.getMetadata(CoreMetKeys.WORKFLOW_INST_ID); logger.info("Workflow instanceId is [" + instanceId + "]"); Validate.notNull(instanceId, "Must specify " + CoreMetKeys.WORKFLOW_INST_ID); return instanceId; } protected void dumpMetadataIfRequested() throws IOException { if (Boolean.parseBoolean(pgeMetadata.getMetadata(DUMP_METADATA))) { new SerializableMetadata(pgeMetadata.asMetadata()) .writeMetadataToXmlStream(new FileOutputStream(getDumpMetadataPath())); } } protected String getDumpMetadataPath() { return new File(pgeConfig.getExeDir()).getAbsolutePath() + "/" + getDumpMetadataName(); } protected String getDumpMetadataName() { return "pgetask-metadata.xml"; } protected void createExeDir() throws PGEException { logger.info("Creating PGE execution working directory: [" + pgeConfig.getExeDir() + "]"); File executionDir = new File(pgeConfig.getExeDir()); if (!(executionDir.exists() || executionDir.mkdirs())) { throw new PGEException("mkdirs returned false for creating [" + pgeConfig.getExeDir() + "]"); } } protected void createOuputDirsIfRequested() throws PGEException { for (OutputDir outputDir : pgeConfig.getOuputDirs()) { if (outputDir.isCreateBeforeExe()) { logger.info("Creating PGE file ouput directory: [" + outputDir.getPath() + "]"); File dir = new File(outputDir.getPath()); if (!(dir.exists() || dir.mkdirs())) { throw new PGEException("mkdir returned false for creating [" + outputDir.getPath() + "]"); } } } } protected void stageFiles() throws PGEException, IOException, ConnectionException, CatalogException, URISyntaxException, DataTransferException, InstantiationException { if (pgeConfig.getFileStagingInfo() != null) { FileStager fileStager = getFileStager(); logger.info("Starting file staging..."); fileStager.stageFiles(pgeConfig.getFileStagingInfo(), pgeMetadata, logger); } else { logger.info("No files to stage."); } } protected FileStager getFileStager() { String fileStagerClass = pgeMetadata.getMetadata(FILE_STAGER); if (fileStagerClass != null) { logger.info("Loading FileStager [" + fileStagerClass + "]"); return createFileStager(fileStagerClass, logger); } else { logger.info("Using default FileStager [" + FileManagerFileStager.class.getCanonicalName() + "]"); return new FileManagerFileStager(); } } protected void createDynamicConfigFiles() throws IOException, PGEException { logger.info("Starting creation of sci pge config files..."); for (DynamicConfigFile dynamicConfigFile : pgeConfig.getDynamicConfigFiles()) { createDynamicConfigFile(dynamicConfigFile); } logger.info("Successfully wrote all sci pge config files!"); } protected void createDynamicConfigFile(DynamicConfigFile dynamicConfigFile) throws PGEException, IOException { Validate.notNull(dynamicConfigFile, "dynamicConfigFile cannot be null"); logger.fine("Starting creation of sci pge config file [" + dynamicConfigFile.getFilePath() + "]..."); // Create parent directory if it doesn't exist. File parentDir = new File(dynamicConfigFile.getFilePath()).getParentFile(); if (!(parentDir.exists() || parentDir.mkdirs())) { throw new PGEException("Failed to create directory where sci pge config file [" + dynamicConfigFile.getFilePath() + "] was to be written"); } // Load writer and write file. logger.fine("Loading writer class for sci pge config file [" + dynamicConfigFile.getFilePath() + "]..."); SciPgeConfigFileWriter writer = createSciPgeConfigFileWriter(dynamicConfigFile.getWriterClass(), logger); logger.fine("Loaded writer [" + writer.getClass().getCanonicalName() + "] for sci pge config file [" + dynamicConfigFile.getFilePath() + "]..."); logger.info("Writing sci pge config file [" + dynamicConfigFile.getFilePath() + "]..."); File configFile = writer.createConfigFile(dynamicConfigFile.getFilePath(), pgeMetadata.asMetadata(), dynamicConfigFile.getArgs()); if (!configFile.exists()) { throw new PGEException( "Writer failed to create config file [" + configFile + "], exists returned false"); } } protected ScriptFile buildPgeRunScript() { logger.fine("Creating PGE run script for shell [" + pgeConfig.getShellType() + "] with contents " + pgeConfig.getExeCmds()); ScriptFile sf = new ScriptFile(pgeConfig.getShellType()); sf.setCommands(pgeConfig.getExeCmds()); return sf; } protected File getScriptPath() { File script = new File(pgeConfig.getExeDir(), getPgeScriptName()); logger.fine("Script file with be written to [" + script + "]"); return script; } protected String getPgeScriptName() { String pgeScriptName = "sciPgeExeScript_" + pgeMetadata.getMetadata(NAME); logger.fine("Generated script file name [" + pgeScriptName + "]"); return pgeScriptName; } protected void runPge() throws PGEException, XmlRpcException { ScriptFile sf = null; try { long startTime = System.currentTimeMillis(); logger.info("PGE start time [" + new Date(startTime) + "]"); // create script to run sf = buildPgeRunScript(); sf.writeScriptFile(getScriptPath().getAbsolutePath()); // run script and evaluate whether success or failure updateStatus(RUNNING_PGE.getWorkflowStatusName()); logger.info("Starting execution of PGE..."); if (!wasPgeSuccessful(ExecUtils.callProgram(pgeConfig.getShellType() + " " + getScriptPath(), logger, new File(pgeConfig.getExeDir()).getAbsoluteFile()))) { throw new RuntimeException("Pge didn't finish successfully"); } else { logger.info("Successfully completed running: '" + sf.getCommands() + "'"); } long endTime = System.currentTimeMillis(); logger.info("PGE end time [" + new Date(startTime) + "]"); long runTime = endTime - startTime; logger.info("PGE runtime in millis [" + runTime + "]"); pgeMetadata.replaceMetadata(PGE_RUNTIME, Long.toString(runTime)); } catch (WorkflowException e) { throw new PGEException( "Exception when executing PGE commands '" + (sf.getCommands()) + "' : " + e.getMessage(), e); } catch (IOException e) { throw new PGEException( "Exception when executing PGE commands '" + (sf.getCommands()) + "' : " + e.getMessage(), e); } } protected boolean wasPgeSuccessful(int returnCode) { return returnCode == 0; } protected void processOutput() throws IOException { for (final OutputDir outputDir : this.pgeConfig.getOuputDirs()) { File[] createdFiles = new File(outputDir.getPath()).listFiles(); if (createdFiles != null) { for (File createdFile : createdFiles) { Metadata outputMetadata = new Metadata(); for (RegExprOutputFiles regExprFiles : outputDir.getRegExprOutputFiles()) { if (Pattern.matches(regExprFiles.getRegExp(), createdFile.getName())) { try { PcsMetFileWriter writer = (PcsMetFileWriter) Class .forName(regExprFiles.getConverterClass()).newInstance(); outputMetadata.replaceMetadata( this.getMetadataForFile((regExprFiles.getRenamingConv() != null) ? createdFile = this.renameFile(createdFile, regExprFiles.getRenamingConv()) : createdFile, writer, regExprFiles.getArgs())); } catch (Exception e) { logger.severe("Failed to create metadata file for '" + createdFile + "' : " + e.getMessage()); } } } if (outputMetadata.getAllKeys().size() > 0) { this.writeFromMetadata(outputMetadata, createdFile.getAbsolutePath() + "." + this.pgeMetadata.getMetadata(MET_FILE_EXT)); } } } } } protected File renameFile(File file, PathUtilsNamingConvention renamingConv) throws NamingConventionException { Metadata curMetadata = this.pgeMetadata.asMetadata(); curMetadata.replaceMetadata(renamingConv.getTmpReplaceMet()); return renamingConv.rename(file, curMetadata); } protected Metadata getMetadataForFile(File sciPgeCreatedDataFile, PcsMetFileWriter writer, Object[] args) throws PGEException, MetExtractorConfigReaderException, ParseException, MetExtractionException, CommonsException, CasMetadataException, FileNotFoundException { return writer.getMetadataForFile(sciPgeCreatedDataFile, this.pgeMetadata, args); } protected void writeFromMetadata(Metadata metadata, String toMetFilePath) throws IOException { new SerializableMetadata(metadata, "UTF-8", false) .writeMetadataToXmlStream(new FileOutputStream(toMetFilePath)); } protected ProductCrawler createProductCrawler() throws MalformedURLException, IllegalAccessException, CrawlerActionException, MetExtractionException, InstantiationException, FileNotFoundException, ClassNotFoundException { /* create a ProductCrawler based on whether or not the output dir specifies a MIME_EXTRACTOR_REPO */ logger.info("Configuring ProductCrawler..."); ProductCrawler crawler; if (pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO) != null && !pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO).equals("")) { crawler = new AutoDetectProductCrawler(); ((AutoDetectProductCrawler) crawler).setMimeExtractorRepo(pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO)); } else { crawler = new StdProductCrawler(); } crawler.setClientTransferer(pgeMetadata.getMetadata(INGEST_CLIENT_TRANSFER_SERVICE_FACTORY)); crawler.setFilemgrUrl(pgeMetadata.getMetadata(INGEST_FILE_MANAGER_URL)); String crawlerConfigFile = pgeMetadata.getMetadata(CRAWLER_CONFIG_FILE); if (!Strings.isNullOrEmpty(crawlerConfigFile)) { crawler.setApplicationContext(new FileSystemXmlApplicationContext(crawlerConfigFile)); List<String> actionIds = pgeMetadata.getAllMetadata(ACTION_IDS); if (actionIds != null) { crawler.setActionIds(actionIds); } } crawler.setRequiredMetadata(pgeMetadata.getAllMetadata(REQUIRED_METADATA)); crawler.setCrawlForDirs(Boolean.parseBoolean(pgeMetadata.getMetadata(CRAWLER_CRAWL_FOR_DIRS))); crawler.setNoRecur(!Boolean.parseBoolean(pgeMetadata.getMetadata(CRAWLER_RECUR))); logger.fine("Passing Workflow Metadata to CAS-Crawler as global metadata . . ."); crawler.setGlobalMetadata(pgeMetadata.asMetadata(PgeMetadata.Type.DYNAMIC)); logger.fine("Created ProductCrawler [" + crawler.getClass().getCanonicalName() + "]"); return crawler; } protected void runIngestCrawler(ProductCrawler crawler) throws PGEException, IOException, XmlRpcException { // Determine if we need to create Metadata files if (crawler instanceof StdProductCrawler) { this.processOutput(); } // Determine directories to crawl. List<File> crawlDirs = new LinkedList<File>(); for (OutputDir outputDir : pgeConfig.getOuputDirs()) { crawlDirs.add(new File(outputDir.getPath())); } // Start crawlin... updateStatus(CRAWLING.getWorkflowStatusName()); boolean attemptIngestAll = Boolean.parseBoolean(pgeMetadata.getMetadata(ATTEMPT_INGEST_ALL)); for (File crawlDir : crawlDirs) { logger.info("Crawling for products in [" + crawlDir + "]"); crawler.crawl(crawlDir); if (!attemptIngestAll) { verifyIngests(crawler); } } if (attemptIngestAll) { verifyIngests(crawler); } } protected void verifyIngests(ProductCrawler crawler) throws PGEException { logger.info("Verifying ingests successful..."); boolean ingestsSuccess = true; String exceptionMsg = ""; for (IngestStatus status : crawler.getIngestStatus()) { if (status.getResult().equals(IngestStatus.Result.FAILURE)) { exceptionMsg += (exceptionMsg.equals("") ? "" : " : ") + "Failed to ingest product [file='" + status.getProduct().getAbsolutePath() + "',result='" + status.getResult() + "',msg='" + status.getMessage() + "']"; ingestsSuccess = false; } else if (!status.getResult().equals(IngestStatus.Result.SUCCESS)) { logger.warning("Product was not ingested [file='" + status.getProduct().getAbsolutePath() + "',result='" + status.getResult() + "',msg='" + status.getMessage() + "']"); } } if (!ingestsSuccess) { throw new PGEException(exceptionMsg); } else { logger.info("Ingests were successful"); } } protected void updateDynamicMetadata() throws XmlRpcException, IOException { pgeMetadata.commitMarkedDynamicMetadataKeys(); wm.updateMetadataForWorkflow(workflowInstId, pgeMetadata.asMetadata(PgeMetadata.Type.DYNAMIC)); } }