Java tutorial
/* * Copyright (c) 2012 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.queue.extensions.gatk; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.IOException; import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.lang.reflect.Type; import java.lang.reflect.TypeVariable; import java.util.*; import java.util.Map.Entry; /** * Generates Queue modules that can be used to run GATK walkers. * * ArgumentCollections are flattened into a single module. */ public class GATKExtensionsGenerator extends CommandLineProgram { private static final Logger logger = Logger.getLogger(GATKExtensionsGenerator.class); public static final String GATK_EXTENSIONS_PACKAGE_NAME = GATKExtensionsGenerator.class.getPackage().getName(); private static final String NEWLINE = String.format("%n"); private static final String CLASS_TEMPLATE = "package %s%n" + "%s%n" + "class %s extends %s {%n" + "%s%s%n" + "%soverride def commandLine = super.commandLine%s%n" + "}%n"; private static final String TRAIT_TEMPLATE = "package %s%n" + "%s%n" + "trait %s extends %s {%n" + "%s%s%n" + "%sabstract override def commandLine = super.commandLine%s%n" + "}%n"; private static final String GATK_DEPENDENCIES_TEMPLATE = "package %s%n" + "%n" + "/** A dynamicly generated list of classes that the GATK Extensions depend on, but are not be detected by default by BCEL. */%n" + "class %s {%n" + "val types = Seq(%n%s)%n" + "}%n"; @Output(fullName = "output_directory", shortName = "outDir", doc = "Directory to output the generated scala", required = true) public File outputDirectory; PluginManager<CommandLineProgram> clpManager = new PluginManager<CommandLineProgram>(CommandLineProgram.class, "CommandLineProgram", "CLP"); GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine(); WalkerManager walkerManager = new WalkerManager(); FilterManager filterManager = new FilterManager(); /** * Required main method implementation. * @param argv Command-line arguments. */ public static void main(String[] argv) { try { start(new GATKExtensionsGenerator(), argv); System.exit(CommandLineProgram.result); } catch (Exception e) { exitSystemWithError(e); } } @Override protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() { List<ArgumentTypeDescriptor> typeDescriptors = new ArrayList<ArgumentTypeDescriptor>(); typeDescriptors .add(new VCFWriterArgumentTypeDescriptor(GATKEngine, System.out, Collections.<Object>emptyList())); typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine, System.out)); typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine, System.out)); return typeDescriptors; } /** * Loops over all the walkers and filters and generates a scala class for each. * @return zero if the run was successful, non-zero if there was an error. */ @Override protected int execute() { try { if (!outputDirectory.isDirectory() && !outputDirectory.mkdirs()) throw new ReviewedStingException("Unable to create output directory: " + outputDirectory); SortedSet<Class<?>> dependents = new TreeSet<Class<?>>(classComparator); for (Class<? extends CommandLineProgram> clp : clpManager.getPlugins()) { try { if (!isGatkProgram(clp)) { logger.debug("Skipping: " + clp); continue; } logger.debug("Generating: " + clp); String clpClassName = clpManager.getName(clp); String clpConstructor = String.format("analysisName = \"%s\"%njavaMainClass = \"%s\"%n", clpClassName, clp.getName()); writeClass("org.broadinstitute.sting.queue.function.JavaCommandLineFunction", clpClassName, false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser, clp), dependents); if (clp == CommandLineGATK.class) { for (Entry<String, Collection<Class<? extends Walker>>> walkersByPackage : walkerManager .getWalkerNamesByPackage(false).entrySet()) { for (Class<? extends Walker> walkerType : walkersByPackage.getValue()) { try { String walkerName = walkerManager.getName(walkerType); List<ArgumentField> argumentFields = new ArrayList<ArgumentField>(); argumentFields .addAll(ArgumentDefinitionField.getArgumentFields(parser, walkerType)); //argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder)); argumentFields.addAll(ReadFilterField.getFilterArguments(parser, walkerType)); String constructor = String.format( "analysisName = \"%1$s\"%nanalysis_type = \"%1$s\"%n", walkerName); String scatterClass = getScatterClass(walkerType); boolean isScatter = false; if (scatterClass != null) { isScatter = true; constructor += String.format("scatterClass = classOf[%s]%n", scatterClass); } writeClass(GATK_EXTENSIONS_PACKAGE_NAME + "." + clpClassName, walkerName, isScatter, constructor, argumentFields, dependents); } catch (Exception e) { throw new ReviewedStingException( "Error generating wrappers for walker " + walkerType, e); } } } } } catch (Exception e) { throw new ReviewedStingException("Error generating wrappers for " + clp, e); } } for (Class<? extends ReadFilter> filter : filterManager.getValues()) { String filterName = filterManager.getName(filter); writeFilter(filterName, ArgumentDefinitionField.getArgumentFields(new ParsingEngine(null), filter), dependents); } writeDependencies(dependents); return 0; } catch (IOException exception) { logger.error("Error generating queue output.", exception); return 1; } } /** * The list of packages to search through. */ private static final List<String> gatkPackages = Arrays.asList("org.broadinstitute.sting.tools", "org.broadinstitute.sting.gatk", "org.broadinstitute.sting.pipeline", "org.broadinstitute.sting.tools", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); /** * Returns true if the class is part of the GATK. * @param clazz Class to check. * @return True if the class is part of the GATK. */ private boolean isGatkProgram(Class<?> clazz) { if (clazz.getPackage() == null) return false; String classPackage = clazz.getPackage().getName(); for (String gatkPackage : gatkPackages) if (classPackage.startsWith(gatkPackage)) return true; return false; } /** * Returns the scatter type for a walker. * @param walkerType The walker to check. * @return The scatter type for the walker. */ private String getScatterClass(Class<? extends Walker> walkerType) { PartitionType partitionType = walkerType.getAnnotation(PartitionBy.class).value(); if (partitionType == PartitionType.NONE) return null; return StringUtils.capitalize(partitionType.name().toLowerCase()) + "ScatterFunction"; } /** * Writes a dynamically generated scala wrapper for a class. * @param baseClass The class to extend from. * @param className The class name to generate. * @param isScatter True if the class is scatter/gatherable. * @param constructor Additional logic for the constructor, or an empty string. * @param argumentFields The list of argument fields for the generated class. * @param dependents A set that should be updated with explicit dependencies that need to be packaged. * @throws IOException If the file cannot be written. */ private void writeClass(String baseClass, String className, boolean isScatter, String constructor, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException { String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } /** * Writes a dynamically generated scala wrapper for a GATK filter. * The filter is defined as a trait in scala, and can be mixed into a GATK command via "val myMixin = new PrintReads with FilterName" * @param className The class name to generate. * @param argumentFields The list of argument fields for the generated class. * @param dependents A set that should be updated with explicit dependencies that need to be packaged. * @throws IOException If the file cannot be written. */ private void writeFilter(String className, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException { String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction", className, "", false, String.format(" + required(\"--read_filter\", \"%s\")", className), argumentFields, dependents); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } /** * Writes the dependents to a scala wrapper that will compile and get picked up by BCEL. * BCEL was missing some classes, such as Enums, when they were defined in the other generated classes. * This generated wrapper makes sure they are explicitly seen by BCEL. * @param dependents Explicit dependencies that need to be packaged. * @throws IOException If the file cannot be written. */ private void writeDependencies(SortedSet<Class<?>> dependents) throws IOException { // Include the enclosing classes too. Scala will be looking for them. SortedSet<Class<?>> enclosings = new TreeSet<Class<?>>(classComparator); for (Class<?> dependent : dependents) for (Class<?> enclosing = dependent; enclosing != null; enclosing = enclosing.getEnclosingClass()) enclosings.add(enclosing); dependents = enclosings; // Oh, and include the classes defined on methods too! enclosings = new TreeSet<Class<?>>(classComparator); for (Class<?> dependent : dependents) { for (Method method : dependent.getDeclaredMethods()) { JVMUtils.addGenericTypes(enclosings, method.getGenericReturnType()); for (Type parameterType : method.getGenericParameterTypes()) JVMUtils.addGenericTypes(enclosings, parameterType); for (Type exceptionType : method.getGenericExceptionTypes()) JVMUtils.addGenericTypes(enclosings, exceptionType); } } dependents = enclosings; // Generate the dependents. String className = "GATKClassDependencies"; StringBuilder classes = new StringBuilder(); for (Class<?> dependent : dependents) { if (dependent.isArray()) continue; if (ArgumentField.isBuiltIn(dependent)) continue; if (!Modifier.isPublic(dependent.getModifiers())) continue; if (classes.length() > 0) classes.append(",").append(NEWLINE); String typeParams = getScalaTypeParams(dependent); classes.append("classOf[").append(dependent.getName().replace("$", ".")).append(typeParams).append("]"); } String content = String.format(GATK_DEPENDENCIES_TEMPLATE, GATK_EXTENSIONS_PACKAGE_NAME, className, classes); writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content); } /** * Returns a string representing the type parameters for a class, or an empty string if there are no type parameters. * @param clazz The class to look for type parameters. * @return The type parameters or an empty string. */ private String getScalaTypeParams(Class<?> clazz) { TypeVariable[] typeParams = clazz.getTypeParameters(); if (typeParams.length == 0) return ""; return "[" + StringUtils.repeat("_", ",", typeParams.length) + "]"; } /** * Writes the generated scala file with this content. * @param fullClassName Generated class name. * @param content scala content. * @throws IOException If the file cannot be written. */ private void writeFile(String fullClassName, String content) throws IOException { File outputFile = new File(outputDirectory, fullClassName.replace(".", "/") + ".scala"); if (outputFile.exists()) { String existingContent = FileUtils.readFileToString(outputFile); if (StringUtils.equals(content, existingContent)) return; } FileUtils.writeStringToFile(outputFile, content); } /** * Generates scala content using CLASS_TEMPLATE or TRAIT_TEMPLATE. * @param scalaTemplate CLASS_TEMPLATE or TRAIT_TEMPLATE * @param baseClass The class to extend from. * @param className The class name to generate. * @param constructor Additional logic for the constructor, or an empty string. * @param isScatter True if the class is scatter/gatherable. * @param commandLinePrefix Additional logic to prefix to the QCommandLine.commandLine, or an empty string. * @param argumentFields The list of argument fields for the generated class. * @param dependents A set that should be updated with explicit dependencies that need to be packaged. * @return The populated template. */ private static String getContent(String scalaTemplate, String baseClass, String className, String constructor, boolean isScatter, String commandLinePrefix, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) { StringBuilder arguments = new StringBuilder(); StringBuilder commandLine = new StringBuilder(commandLinePrefix); Set<String> importSet = new HashSet<String>(); boolean isGather = false; List<String> freezeFields = new ArrayList<String>(); for (ArgumentField argumentField : argumentFields) { arguments.append(argumentField.getArgumentAddition()); commandLine.append(argumentField.getCommandLineAddition()); importSet.addAll(argumentField.getImportStatements()); freezeFields.add(argumentField.getFreezeFields()); dependents.addAll(argumentField.getDependentClasses()); isGather |= argumentField.isGather(); } if (isScatter) { importSet.add("import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction"); baseClass += " with ScatterGatherableFunction"; } if (isGather) importSet.add("import org.broadinstitute.sting.commandline.Gather"); // Sort the imports so that the are always in the same order. List<String> sortedImports = new ArrayList<String>(importSet); Collections.sort(sortedImports); StringBuffer freezeFieldOverride = new StringBuffer(); for (String freezeField : freezeFields) freezeFieldOverride.append(freezeField); if (freezeFieldOverride.length() > 0) { freezeFieldOverride.insert(0, String.format("override def freezeFieldValues() {%nsuper.freezeFieldValues()%n")); freezeFieldOverride.append(String.format("}%n%n")); } String importText = sortedImports.size() == 0 ? "" : NEWLINE + StringUtils.join(sortedImports, NEWLINE) + NEWLINE; // see CLASS_TEMPLATE and TRAIT_TEMPLATE below return String.format(scalaTemplate, GATK_EXTENSIONS_PACKAGE_NAME, importText, className, baseClass, constructor, arguments, freezeFieldOverride, commandLine); } private static final Comparator<Class<?>> classComparator = new Comparator<Class<?>>() { @Override public int compare(Class<?> a, Class<?> b) { return (a == null ? "" : a.getName()).compareTo(b == null ? "" : b.getName()); } }; }