Java tutorial
/******************************************************************************* * In the Hi-WAY project we propose a novel approach of executing scientific * workflows processing Big Data, as found in NGS applications, on distributed * computational infrastructures. The Hi-WAY software stack comprises the func- * tional workflow language Cuneiform as well as the Hi-WAY ApplicationMaster * for Apache Hadoop 2.x (YARN). * * List of Contributors: * * Marc Bux (HU Berlin) * Jrgen Brandt (HU Berlin) * Hannes Schuh (HU Berlin) * Ulf Leser (HU Berlin) * * Jrgen Brandt is funded by the European Commission through the BiobankCloud * project. Marc Bux is funded by the Deutsche Forschungsgemeinschaft through * research training group SOAMED (GRK 1651). * * Copyright 2014 Humboldt-Universitt zu Berlin * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.huberlin.wbi.hiway.logstats; import; import; import; import; import; import; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import org.json.JSONException; import org.json.JSONObject; import de.huberlin.hiwaydb.useDB.HiwayDBI; import de.huberlin.wbi.cuneiform.core.semanticmodel.JsonReportEntry; /* * Parses a log generated by Cuneiform / Hi-WAY and emits * * Overview - For all tasks combined: * (1) Idle Time (time in which a container could be allocated but is not) * (2) Scheduling time * (2) Container Startup / Stagein * (3) Container Shutdown / Stageout * (4) Execution time * * Contribution to Runtime: total container uptime per task * * Task Execution Characteristics: Startup vs Execution vs Shutdown * */ public class LogParser { public class JsonReportEntryComparatorByTimestamp implements Comparator<JsonReportEntry> { @Override public int compare(JsonReportEntry o1, JsonReportEntry o2) { return, o2.getTimestamp()); } } public class OnsetTimestampComparator implements Comparator<Invocation> { @Override public int compare(Invocation o1, Invocation o2) { return, o2.getExecTimestamp()); } } public static void main(String[] args) { for (int i = 0; i < args.length; i++) { LogParser logParser = new LogParser(args[i]); try { logParser.firstPass(); logParser.secondPass(); logParser.thirdPass(); } catch (JSONException | IOException e) { e.printStackTrace(); } logParser.printStatistics(i == 0); } } List<JsonReportEntry> entries; private File file; private Map<Long, Invocation> invocations; private WorkfowRun run; public LogParser(String fileName) { file = new File(fileName); invocations = new HashMap<>(); run = new WorkfowRun(); } private static void expandEntry(JsonReportEntry incomplete, JsonReportEntry complete) { incomplete.setInvocId(complete.getInvocId()); incomplete.setLang(complete.getLang()); incomplete.setTaskId(complete.getTaskId()); incomplete.setTaskname(complete.getTaskName()); } private void expandHiwayEvents() throws JSONException { Queue<JsonReportEntry> execQ = new LinkedList<>(); Map<String, JsonReportEntry> allocatedMap = new HashMap<>(); Queue<JsonReportEntry> completedQ = new LinkedList<>(); for (JsonReportEntry entry : entries) { switch (entry.getKey()) { case JsonReportEntry.KEY_INVOC_EXEC: execQ.add(entry); break; case JsonReportEntry.KEY_INVOC_TIME: JsonReportEntry completed = completedQ.remove(); JSONObject value = completed.getValueJsonObj(); JsonReportEntry allocated = allocatedMap.get(value.getString("container-id")); expandEntry(completed, entry); expandEntry(allocated, entry); break; case HiwayDBI.KEY_HIWAY_EVENT: value = entry.getValueJsonObj(); switch (value.getString("type")) { case "container-requested": expandEntry(entry, execQ.remove()); break; case "container-allocated": allocatedMap.put(value.getString("container-id"), entry); break; case "container-completed": completedQ.add(entry); break; default: } break; default: } } } public void firstPass() throws IOException, JSONException { entries = new LinkedList<>(); try (BufferedReader reader = new BufferedReader(new FileReader(file))) { String line; while ((line = reader.readLine()) != null) { JsonReportEntry entry = new JsonReportEntry(line); entries.add(entry); } } removeDuplicates(); removeBadContainers(); expandHiwayEvents(); Collections.sort(entries, new JsonReportEntryComparatorByTimestamp()); printToFile(new File("output")); } private String lifecycle(Collection<Invocation> invocs, boolean idleTime) { long sched = 0; long startup = 0; long stagein = 0; long exec = 0; long stageout = 0; long shutdown = 0; for (Invocation invoc : invocs) { sched += invoc.getSchedTime(); startup += invoc.getStartupTime(); stagein += invoc.getStageinTime(); exec += invoc.getExecTime(); stageout += invoc.getStageoutTime(); shutdown += invoc.getShutdownTime(); } String lifecycle = sched + "\t" + startup + "\t" + stagein + "\t" + exec + "\t" + stageout + "\t" + shutdown + "\t"; long idle = run.getMaxConcurrentNodes() * (run.getRuntime()) - run.getNoTaskReadyTime() - sched - startup - stagein - exec - stageout - shutdown; return idleTime ? idle + "\t" + lifecycle : lifecycle; } private static String lifecycleHeaders(String category, boolean idleTime) { String pre = category + " idle\t"; String post = category + " scheduling\t" + category + " startup\t" + category + " stage-in\t" + category + " execution\t" + category + " stage-out\t" + category + " shutdown\t"; return idleTime ? pre + post : post; } public void printIncrements(String printTask, String printHost) { Map<String, Map<String, List<Invocation>>> invocsByHostAndTask = new HashMap<>(); for (Invocation invocation : invocations.values()) { String task = invocation.getTaskName(); String host = invocation.getHostName(); Map<String, List<Invocation>> invocsByHost; if (!invocsByHostAndTask.containsKey(task)) { invocsByHost = new HashMap<>(); invocsByHostAndTask.put(task, invocsByHost); } invocsByHost = invocsByHostAndTask.get(task); List<Invocation> invocs; if (!invocsByHost.containsKey(host)) { invocs = new ArrayList<>(); invocsByHost.put(host, invocs); } invocs = invocsByHost.get(host); invocs.add(invocation); } try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File("output"), true))) { List<Invocation> invocs = invocsByHostAndTask.get(printTask).get(printHost); Collections.sort(invocs, new OnsetTimestampComparator()); for (int i = 0; i < invocs.size() - 1; i++) { Invocation invocA = invocs.get(i); Invocation invocB = invocs.get(i + 1); writer.write((invocB.getExecTime() - invocA.getExecTime()) + "\t" + (invocB.getFileSize() - invocA.getFileSize()) + "\n"); } } catch (IOException e) { e.printStackTrace(); } } public void printStatistics(boolean printHeaders) { Map<String, Set<Invocation>> invocationsByTaskname = new HashMap<>(); for (Invocation invocation : invocations.values()) { String taskname = invocation.getTaskName(); if (!invocationsByTaskname.containsKey(taskname)) { Set<Invocation> newSet = new HashSet<>(); invocationsByTaskname.put(taskname, newSet); } invocationsByTaskname.get(taskname).add(invocation); } List<String> taskNames = new LinkedList<>(invocationsByTaskname.keySet()); Collections.sort(taskNames); if (printHeaders) { System.out.print("runtime\t"); System.out.print(lifecycleHeaders("total", true)); for (String taskname : taskNames) { System.out.print(lifecycleHeaders(taskname, false)); } System.out.println(); } System.out.print(run.getRuntime() + "\t"); System.out.print(lifecycle(invocations.values(), true)); for (String taskname : taskNames) { System.out.print(lifecycle(invocationsByTaskname.get(taskname), false)); } System.out.println(); } private void printToFile(File output) throws IOException { try (BufferedWriter writer = new BufferedWriter(new FileWriter(output))) { for (JsonReportEntry entry : entries) { writer.write(entry.toString() + "\n"); } } } private void removeBadContainers() throws JSONException { Set<String> badContainers = new HashSet<>(); List<JsonReportEntry> badEntries = new LinkedList<>(); for (JsonReportEntry entry : entries) { if (entry.getKey().equals(HiwayDBI.KEY_HIWAY_EVENT)) { JSONObject value = entry.getValueJsonObj(); if (value.getString("type").equals("container-completed")) { if (value.getInt("exit-code") != 0) { badContainers.add(value.getString("container-id")); badEntries.add(entry); } } } } for (JsonReportEntry entry : entries) { if (entry.getKey().equals(HiwayDBI.KEY_HIWAY_EVENT)) { JSONObject value = entry.getValueJsonObj(); if (value.getString("type").equals("container-allocated")) { if (badContainers.contains(value.getString("container-id"))) { badEntries.add(entry); } } } } entries.removeAll(badEntries); } private void removeDuplicates() { entries = new LinkedList<>(new LinkedHashSet<>(entries)); } public void secondPass() throws JSONException { int maxContainers = 0; int currentContainers = 0; for (JsonReportEntry entry : entries) { if (entry.getInvocId() != null && !invocations.containsKey(entry.getInvocId())) { invocations.put(entry.getInvocId(), new Invocation(entry.getTaskName())); } Invocation invocation = invocations.get(entry.getInvocId()); switch (entry.getKey()) { case HiwayDBI.KEY_HIWAY_EVENT: JSONObject value = entry.getValueJsonObj(); switch (value.getString("type")) { case "container-allocated": invocation.setStartupTimestamp(entry.getTimestamp()); currentContainers++; maxContainers = Math.max(currentContainers, maxContainers); break; case "container-completed": invocation.setShutdownTimestamp(entry.getTimestamp()); currentContainers--; break; default: } break; case HiwayDBI.KEY_INVOC_TIME_SCHED: invocation.setSchedTime(entry.getValueJsonObj().getLong("realTime")); break; case HiwayDBI.KEY_INVOC_HOST: invocation.setHostName(entry.getValueRawString()); break; case JsonReportEntry.KEY_INVOC_TIME: invocation.setExecTime(entry.getValueJsonObj().getLong("realTime")); invocation.setExecTimestamp(entry.getTimestamp()); break; case HiwayDBI.KEY_INVOC_TIME_STAGEIN: invocation.setStageinTime(entry.getValueJsonObj().getLong("realTime")); invocation.setStageinTimestamp(entry.getTimestamp()); break; case HiwayDBI.KEY_INVOC_TIME_STAGEOUT: invocation.setStageoutTime(entry.getValueJsonObj().getLong("realTime")); invocation.setStageoutTimestamp(entry.getTimestamp()); break; case HiwayDBI.KEY_WF_NAME: run.setRunOnsetTimestamp(entry.getTimestamp()); break; case HiwayDBI.KEY_WF_TIME: run.setRuntime(Long.parseLong(entry.getValueRawString())); break; case JsonReportEntry.KEY_FILE_SIZE_STAGEIN: invocations.get(entry.getInvocId()).addFileSize(Long.parseLong(entry.getValueRawString())); break; default: } } run.setMaxConcurrentNodes(maxContainers); } public void thirdPass() throws JSONException { // time in which a container has got no work to do, as there is // currently no task ready to execute long noTaskReadyTime = 0; long lastTimestamp = run.getRunOnsetTimestamp(); int readyTasks = 0; for (JsonReportEntry entry : entries) { switch (entry.getKey()) { case JsonReportEntry.KEY_INVOC_EXEC: readyTasks++; break; case HiwayDBI.KEY_HIWAY_EVENT: JSONObject value = entry.getValueJsonObj(); switch (value.getString("type")) { case "container-completed": readyTasks--; break; default: } break; default: } long timestamp = entry.getTimestamp(); noTaskReadyTime += Math.max(0, run.getMaxConcurrentNodes() - readyTasks) * (timestamp - lastTimestamp); lastTimestamp = timestamp; } run.setNoTaskReadyTime(noTaskReadyTime); } }