Java tutorial
/******************************************************************************* * In the Hi-WAY project we propose a novel approach of executing scientific * workflows processing Big Data, as found in NGS applications, on distributed * computational infrastructures. The Hi-WAY software stack comprises the func- * tional workflow language Cuneiform as well as the Hi-WAY ApplicationMaster * for Apache Hadoop 2.x (YARN). * * List of Contributors: * * Jrgen Brandt (HU Berlin) * Marc Bux (HU Berlin) * Ulf Leser (HU Berlin) * * Jrgen Brandt is funded by the European Commission through the BiobankCloud * project. Marc Bux is funded by the Deutsche Forschungsgemeinschaft through * research training group SOAMED (GRK 1651). * * Copyright 2014 Humboldt-Universitt zu Berlin * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.huberlin.wbi.cuneiform.core.invoc; import java.nio.file.Path; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import de.huberlin.wbi.cuneiform.core.semanticmodel.CompoundExpr; import de.huberlin.wbi.cuneiform.core.semanticmodel.DataType; import de.huberlin.wbi.cuneiform.core.semanticmodel.ForeignLambdaExpr; import de.huberlin.wbi.cuneiform.core.semanticmodel.JsonReportEntry; import de.huberlin.wbi.cuneiform.core.semanticmodel.NameExpr; import de.huberlin.wbi.cuneiform.core.semanticmodel.NotBoundException; import de.huberlin.wbi.cuneiform.core.semanticmodel.NotDerivableException; import de.huberlin.wbi.cuneiform.core.semanticmodel.Param; import de.huberlin.wbi.cuneiform.core.semanticmodel.Prototype; import de.huberlin.wbi.cuneiform.core.semanticmodel.ReduceVar; import de.huberlin.wbi.cuneiform.core.semanticmodel.CfSemanticModelVisitor; import de.huberlin.wbi.cuneiform.core.semanticmodel.StringExpr; import de.huberlin.wbi.cuneiform.core.semanticmodel.Ticket; import de.huberlin.wbi.cuneiform.core.semanticmodel.Type; public abstract class Invocation { private static final Map<String, String> libPathMap = new HashMap<>(); protected static final String FUN_LOG = "cflogmsg"; protected static final String FUN_LOGFILE = "cflogfilemsg"; protected static final String FUN_NORMALIZE = "cfnormalize"; public static final String REPORT_FILENAME = "__report__.txt"; public static final String SCRIPT_NAME = "cfscript"; public static final String SUCCESS_FILENAME = "__success__"; public static final String STDOUT_FILENAME = "__stdout__.txt"; public static final String STDERR_FILENAME = "__stderr__.txt"; public static final String LOCK_FILENAME = "__lock__"; protected final String libPath; private final Ticket ticket; public Invocation(Ticket ticket, String libPath) { if (ticket == null) throw new NullPointerException("Ticket must not be null."); this.ticket = ticket; this.libPath = libPath; } public void evalReport(Set<JsonReportEntry> report) throws JSONException { if (report == null) throw new NullPointerException("Report entry set must not be null."); for (JsonReportEntry entry : report) evalReport(entry); } public void evalReport(JsonReportEntry entry) throws JSONException { JSONObject obj; JSONArray array; CompoundExpr ce; int i, n; if (entry == null) throw new NullPointerException("Report entry must not be null."); if (!entry.getKey().equals(JsonReportEntry.KEY_INVOC_OUTPUT)) return; obj = entry.getValueJsonObj(); for (NameExpr nameExpr : ticket.getOutputList()) { array = obj.getJSONArray(nameExpr.getId()); ce = new CompoundExpr(); n = array.length(); for (i = 0; i < n; i++) ce.addSingleExpr(new StringExpr(array.getString(i))); ticket.setValue(nameExpr, ce); } } @SuppressWarnings("static-method") public String[] getCmd() { return new String[] { "./" + SCRIPT_NAME }; } @SuppressWarnings("static-method") public Path getExecutablePath(Path location) { return location.resolve(SCRIPT_NAME); } public JsonReportEntry getExecutableLogEntry() { return ticket.getExecutableLogEntry(); } public JsonReportEntry getScriptLogEntry() throws NotBoundException, NotDerivableException { return new JsonReportEntry(ticket, JsonReportEntry.KEY_INVOC_SCRIPT, toScript()); } public String getFunDef() throws NotDerivableException { return defFunctionLog() + defFunctionNormalize() + defFunctionLogFile(); } public String getLangLabel() { return ticket.getLangLabel(); } public Set<String> getStageInList() throws NotDerivableException { CompoundExpr ce; Set<String> set; try { set = new HashSet<>(); for (NameExpr nameExpr : ticket.getNameSet()) if (isParamStage(nameExpr.getId())) { ce = ticket.getExpr(nameExpr); for (String s : ce.normalize()) set.add(s); } return set; } catch (NotBoundException e) { throw new RuntimeException(e.getMessage()); } } public Set<String> getStageOutList() throws NotDerivableException { CompoundExpr ce; Set<String> set; try { set = new HashSet<>(); for (NameExpr nameExpr : ticket.getOutputList()) if (isOutputStage(nameExpr.getId())) { ce = ticket.getOutputValue(nameExpr); for (String s : ce.normalize()) set.add(s); } return set; } catch (NotBoundException e) { throw new RuntimeException(e.getMessage()); } } public UUID getRunId() { return ticket.getRunId(); } public long getTaskId() { return ticket.getLambdaId(); } public String getTaskName() { return ticket.getTaskName(); } public Ticket getTicket() { return ticket; } public long getTicketId() { return ticket.getTicketId(); } public boolean hasTaskName() { return ticket.hasTaskName(); } public boolean hasLibPath() { return libPath != null; } public String toScript() throws NotBoundException, NotDerivableException { StringBuffer buf; buf = new StringBuffer(); // insert shebang buf.append(getShebang()).append('\n'); // modify library path buf.append(comment("modify library path")); if (hasLibPath()) buf.append(getLibPath()); // import libraries buf.append(comment("import libraries")); buf.append(getImport()).append('\n'); // define necessary functions buf.append(comment("define necessary functions")); buf.append(getFunDef()).append('\n'); // bind single output variables to default values buf.append(comment("bind single output variables to default values")); for (String outputName : getSingleOutputNameSet()) buf.append(varDef(outputName, quote(outputName))); buf.append('\n'); // bind input parameters buf.append(comment("bind input parameters")); for (String paramName : getSingleParamNameSet()) { if (paramName.equals(CfSemanticModelVisitor.LABEL_TASK)) continue; buf.append(varDef(paramName, quote(getResolveableBoundToSingleParam(paramName)))); } for (String paramName : getReduceParamNameSet()) buf.append(varDef(paramName, getReduceParam(paramName))); buf.append('\n'); // report stage in file sizes and report error when something is missing buf.append(comment("report stage in file sizes and report error when something is missing")); buf.append(getStageInCollect()).append('\n'); // insert function body buf.append(comment("insert function body")); buf.append(ticket.getBody()).append('\n'); // check post buf.append(comment("check post")); buf.append(getCheckPost()).append('\n'); // rename output files buf.append(comment("rename output files")); buf.append(getOutputRename()).append('\n'); // collect output variables buf.append(comment("collect output variables")); buf.append(getOutputCollect()).append('\n'); // collect stage out information buf.append(comment("collect stage out information")); buf.append(getStageOutCollect()).append('\n'); return buf.toString(); } @SuppressWarnings("static-method") protected String getCheckPost() { return ""; } @SuppressWarnings("static-method") protected String getImport() { return ""; } protected abstract String callFunction(String name, String... argValue); protected abstract String callProcedure(String name, String... argValue); /** Removes the first character of a string. * * @param varName The name of the variable that holds the input string. * @return A statement in the foreign language. */ protected abstract String clip(String varName); protected abstract String comment(String comment); protected abstract String copyArray(String from, String to); protected abstract String defFunctionLog() throws NotDerivableException; protected abstract String defFunctionLogFile() throws NotDerivableException; protected abstract String defFunctionNormalize() throws NotDerivableException; protected abstract String dereference(String varName); protected abstract String fileSize(String filename); protected abstract String forEach(String listName, String elementName, String body); protected abstract String getShebang(); protected abstract String getLibPath(); protected abstract String ifListIsNotEmpty(String listName, String body); protected abstract String ifNotFileExists(String fileName, String body); protected abstract String join(String... elementList); protected abstract String listAppend(String listName, String element); protected abstract String listToBraceCommaSeparatedString(String listName, String stringName, String open, String close); protected abstract String newList(String listName); protected abstract String quote(String content); protected abstract String raise(String msg); protected abstract String symlink(String src, String dest); protected abstract String varDef(String varname, String value); protected abstract String varDef(String varname, CompoundExpr ce) throws NotDerivableException; private boolean isOutputStage(String outputName) { Prototype prototype; Type type; DataType dataType; prototype = ticket.getPrototype(); for (NameExpr nameExpr : prototype.getOutputList()) if (nameExpr.getId().equals(outputName)) { if (!nameExpr.hasType()) return false; type = nameExpr.getType(); if (!(type instanceof DataType)) return false; dataType = (DataType) type; return dataType.getId().equals(CfSemanticModelVisitor.LABEL_FILE); } throw new RuntimeException("Output not found."); } private boolean isParamStage(String paramName) { Prototype prototype; Type type; DataType dataType; prototype = ticket.getPrototype(); for (NameExpr nameExpr : prototype.getParamNameSet()) if (nameExpr.getId().equals(paramName)) { if (!nameExpr.hasType()) return false; type = nameExpr.getType(); if (!(type instanceof DataType)) return false; dataType = (DataType) type; return dataType.getId().equals(CfSemanticModelVisitor.LABEL_FILE); } throw new RuntimeException("Output not found."); } private int getOutputChannel(String outputName) { Prototype prototype; int i, n; NameExpr output; prototype = ticket.getPrototype(); n = prototype.getNumOutput(); for (i = 0; i < n; i++) { output = prototype.getOutput(i); if (output.getId().equals(outputName)) return i + 1; } throw new RuntimeException("Output not found."); } protected String getOutputCollect() { StringBuffer buf; buf = new StringBuffer(); buf.append(varDef("CFSTR", quote(""))); for (String outputName : getSingleOutputNameSet()) buf.append(varDef("CFSTR", join(dereference("CFSTR"), quote(","), quote(outputName + ":[\""), dereference(outputName), quote("\"]")))); for (String outputName : getReduceOutputNameSet()) { buf.append(varDef("CFSTR1", quote(""))) .append(forEach(outputName, "CFI", varDef("CFSTR1", join(dereference("CFSTR1"), quote(",\""), dereference("CFI"), quote("\""))))) .append(clip("CFSTR1")).append(varDef("CFSTR", join(dereference("CFSTR"), quote(","), quote(outputName + ":["), dereference("CFSTR1"), quote("]")))); } buf.append(clip("CFSTR")) .append(varDef("CFSTR", join(quote("{"), dereference("CFSTR"), quote("}")))) .append(callProcedure(FUN_LOG, quote(JsonReportEntry.KEY_INVOC_OUTPUT), dereference("CFSTR"))) .append('\n'); return buf.toString(); } protected String getOutputRename() { StringBuffer buf; buf = new StringBuffer(); for (String outputName : getSingleOutputNameSet()) if (isOutputStage(outputName)) { buf.append(varDef("CFFILENAME", callFunction(FUN_NORMALIZE, String.valueOf(getOutputChannel(outputName)), dereference(outputName)))); buf.append(symlink(dereference(outputName), dereference("CFFILENAME"))); buf.append(varDef(outputName, dereference("CFFILENAME"))); } for (String outputName : getReduceOutputNameSet()) if (isOutputStage(outputName)) { buf.append(newList("CFLIST")); buf.append(forEach(outputName, "CFFILENAME", varDef("CFNEWFILENAME", callFunction(FUN_NORMALIZE, String.valueOf(getOutputChannel(outputName)), dereference("CFFILENAME"))) + listAppend("CFLIST", dereference("CFNEWFILENAME")) + symlink(dereference("CFFILENAME"), dereference("CFNEWFILENAME")))); // buf.append( varDef( outputName, dereference( "__LIST" ) ) ); buf.append(copyArray("CFLIST", outputName)); } return buf.toString(); } private Set<String> getReduceOutputNameSet() { List<NameExpr> outputList; Set<String> reduceList; outputList = ticket.getOutputList(); reduceList = new HashSet<>(); for (NameExpr nameExpr : outputList) if (nameExpr instanceof ReduceVar) reduceList.add(nameExpr.getId()); return reduceList; } protected Set<String> getReduceParamNameSet() { Set<Param> paramSet; Set<String> reduceList; paramSet = ticket.getParamSet(); reduceList = new HashSet<>(); for (Param param : paramSet) if (param instanceof ReduceVar) reduceList.add(((ReduceVar) param).getId()); return reduceList; } protected CompoundExpr getReduceParam(String paramName) throws NotBoundException { return ticket.getExpr(paramName); } protected String getResolveableBoundToSingleParam(String paramName) throws NotBoundException, NotDerivableException { return ticket.getExpr(paramName).normalize().get(0); } protected Set<String> getSingleOutputNameSet() { List<NameExpr> outputList; Set<String> nonReduceList; outputList = ticket.getOutputList(); nonReduceList = new HashSet<>(); for (NameExpr nameExpr : outputList) if (!(nameExpr instanceof ReduceVar)) nonReduceList.add(nameExpr.getId()); return nonReduceList; } protected Set<String> getSingleParamNameSet() { Set<Param> paramSet; Set<String> nonReduceList; paramSet = ticket.getParamSet(); nonReduceList = new HashSet<>(); for (Param param : paramSet) if (!(param instanceof ReduceVar)) for (NameExpr nameExpr : param.getNameExprSet()) nonReduceList.add(nameExpr.getId()); return nonReduceList; } protected String getStageInCollect() { StringBuffer buf; buf = new StringBuffer(); for (String inputName : getSingleParamNameSet()) if (isParamStage(inputName)) buf.append(ifNotFileExists(dereference(inputName), raise(join(quote("Stage in: A file "), dereference(inputName), quote(" should be present but has not been found."))))) .append(varDef("SIZE", fileSize(dereference(inputName)))) .append(callProcedure(FUN_LOGFILE, dereference(inputName), quote(JsonReportEntry.KEY_FILE_SIZE_STAGEIN), dereference("SIZE"))) .append('\n'); for (String inputName : getReduceParamNameSet()) if (isParamStage(inputName)) buf.append(forEach(inputName, "CFI", ifNotFileExists(dereference("CFI"), raise(join(quote("Stage in: A file "), dereference("CFI"), quote(" should be present but has not been found.")))) + varDef("SIZE", fileSize(dereference("CFI"))) + callFunction(FUN_LOGFILE, dereference("CFI"), quote(JsonReportEntry.KEY_FILE_SIZE_STAGEIN), dereference("SIZE")))); return buf.toString(); } protected String getStageOutCollect() { StringBuffer buf; buf = new StringBuffer(); for (String outputName : getSingleOutputNameSet()) if (isOutputStage(outputName)) buf.append(ifNotFileExists(dereference(outputName), raise(join(quote("Stage out: A file "), dereference(outputName), quote(" should have been created but has not been found."))))) .append(varDef("SIZE", fileSize(dereference(outputName)))) .append(callProcedure(FUN_LOGFILE, dereference(outputName), quote(JsonReportEntry.KEY_FILE_SIZE_STAGEOUT), dereference("SIZE"))) .append('\n'); for (String outputName : getReduceOutputNameSet()) if (isOutputStage(outputName)) buf.append(forEach(outputName, "CFI", ifNotFileExists(dereference("CFI"), raise(join(quote("Stage out: A file "), dereference("CFI"), quote(" should be present but has not been found.")))) + varDef("SIZE", fileSize(dereference("CFI"))) + callFunction(FUN_LOGFILE, dereference("CFI"), quote(JsonReportEntry.KEY_FILE_SIZE_STAGEOUT), dereference("SIZE")))); return buf.toString(); } public static Invocation createInvocation(Ticket ticket) { String label, libPath; label = ticket.getLangLabel(); libPath = libPathMap.get(label); switch (label) { case ForeignLambdaExpr.LANGID_BASH: return new BashInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_R: return new RInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_PERL: return new PerlInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_MATLAB: return new MatlabInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_OCTAVE: return new OctaveInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_SCALA: return new ScalaInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_JAVA: return new ScalaInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_PYTHON: return new PythonInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_LISP: return new LispInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_ERLANG: return new ErlangInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_HASKELL: return new HaskellInvocation(ticket, libPath); case ForeignLambdaExpr.LANGID_PEGASUS: return new PegasusInvocation(ticket, libPath); default: throw new RuntimeException("Language label '" + label + "' not recognized."); } } public static void putLibPath(String langId, String libPath) { if (langId == null) throw new NullPointerException("Language id must not be null."); libPathMap.put(langId, libPath); } public JsonReportEntry createJsonReportEntry(String file, String key, String value) { return new JsonReportEntry(getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), file, key, value); } public JsonReportEntry createJsonReportEntry(long timestamp, String file, String key, String value) { return new JsonReportEntry(timestamp, getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), file, key, value); } public JsonReportEntry createJsonReportEntry(long timestamp, String file, String key, JSONObject value) { return new JsonReportEntry(timestamp, getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), file, key, value); } public JsonReportEntry createJsonReportEntry(String key, String value) { return new JsonReportEntry(getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), null, key, value); } public JsonReportEntry createJsonReportEntry(long timestamp, String key, String value) { return new JsonReportEntry(timestamp, getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), null, key, value); } public JsonReportEntry createJsonReportEntry(long timestamp, String key, JSONObject value) { return new JsonReportEntry(timestamp, getRunId(), getTaskId(), getTaskName(), getLangLabel(), getTicketId(), null, key, value); } }