Java tutorial
/** * The contents of this file may be used under the terms of the Apache License, Version 2.0 * in which case, the provisions of the Apache License Version 2.0 are applicable instead of those above. * * Copyright 2014, Ecarf.io * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.ecarf.core.utils; import io.cloudex.cloud.impl.google.compute.GoogleMetaData; import io.cloudex.framework.cloud.entities.StorageObject; import io.ecarf.core.cloud.impl.google.EcarfGoogleCloudServiceImpl; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.StringUtils; import com.google.common.collect.Lists; /** * Parses coordinator & processor log files, must end with .log * @author Omer Dawelbeit (omerio) * */ public class LogParser { private static final Map<String, TimeUnit> UNITS = new HashMap<>(); static { UNITS.put("ns", TimeUnit.NANOSECONDS); UNITS.put("\u03bcs", TimeUnit.MICROSECONDS); UNITS.put("ms", TimeUnit.MILLISECONDS); UNITS.put("s", TimeUnit.SECONDS); UNITS.put("min", TimeUnit.MINUTES); UNITS.put("h", TimeUnit.HOURS); UNITS.put("d", TimeUnit.DAYS); } private static final String PROCESSOR = "processor"; private static final String COORDINATOR = "coordinator"; private static final String TIMER = " TIMER# Task io.ecarf.core.cloud.task."; private static final String COMPLETED_IN = " completed in"; private static final String DUMMY_TASK = "processor.DummyProcessorTask"; private static final String EXTRACT_TASK = "processor.analyze.ExtractCountTerms2PartTask"; private static final String ASSEMBLE_DICT_TASK = "processor.dictionary.AssembleDictionaryTask"; private static final String PROCESS_LOAD_TASK = "processor.ProcessLoadTask"; private static final String LOAD_TASK = "coordinator.LoadBigDataFilesTask"; private static final String DO_REASON_TASK = "processor.reason.phase2.DoReasonTask"; private static final String DO_REASON_TASK1 = "processor.reason.phase3.DoReasonTask"; private static final String REASON_TASK_SUMMARY = " [main] reason.phase2.DoReasonTask"; private static final String ELAPSED_JOB = "framework.components.Coordinator - TIMER# Job elapsed time:"; private static final String BIGQUERY_SAVE = "[main] impl.google.GoogleCloudServiceImpl - BigQuery query data saved successfully, timer:"; private static final String BIGQUERY_JOB_ELAPSED = "[main] impl.google.GoogleCloudServiceImpl - Job Status: DONE, elapsed time (secs): "; private static final String ASSEMBLE_DICTIONARY_SUBTASK = "processor.dictionary.AssembleDictionarySubTask"; private static final String TERM_DICT_CON = "term.dictionary.TermDictionaryConcurrent"; private static final String TERM_DICT = "term.dictionary.TermDictionary - TIMER#"; private static final String JSON_NUM_VM = "\"numberOfProcessors\":"; private static final String JSON_VM_TYPE = "\"vmType\":"; private static final String FILE_ITEMS = "processor.files.ProcessFilesTask - Processing files: "; private static final String DICT_DOWNLOAD = "task.processor.ProcessLoadTask - Loading the dictionary from file: "; private static final String DICT_LOAD = "task.processor.ProcessLoadTask - Dictionary loaded successfully, memory usage: "; private static final String BIGQUERY_ROWS = " rows from BigQuery for jobId: "; private static final String DOWNLOADING = "[main] impl.google.GoogleCloudServiceImpl - Downloading "; private static final String INSERTING = "[main] reason.phase2.DoReasonTask9 - Inserting "; private static final String INSERTING8 = "[main] reason.phase2.DoReasonTask8 - Inserting "; private static final String BIGQUERY_INF = ", inferred triples into Big Data table for "; private EcarfGoogleCloudServiceImpl service; private Set<String> files = new HashSet<>(); //private List<Double> jobElapsedTimes = new ArrayList<>(); private List<CoordinatorStats> coordinators = new ArrayList<>(); private List<ProcessorStats> processors = new ArrayList<>(); private List<DictionaryStats> dictionaries = new ArrayList<>(); private List<Rows> bigQueryExport = new ArrayList<>(); private List<Rows> bigQueryImport = new ArrayList<>(); /** * @throws Exception * */ public LogParser(String folder) throws Exception { super(); boolean remote = folder.startsWith(GoogleMetaData.CLOUD_STORAGE_PREFIX); if (remote) { String bucket = StringUtils.remove(folder, GoogleMetaData.CLOUD_STORAGE_PREFIX); this.setUp(); List<StorageObject> objects = this.service.listCloudStorageObjects(bucket); for (StorageObject object : objects) { String name = object.getName(); if (name.endsWith(Constants.DOT_LOG)) { String localFile = FilenameUtils.getLocalFilePath(name); service.downloadObjectFromCloudStorage(name, localFile, bucket); this.files.add(localFile); } } } else { // local file DirectoryStream.Filter<Path> filter = new DirectoryStream.Filter<Path>() { public boolean accept(Path file) throws IOException { String filename = file.toString(); return filename.endsWith(Constants.DOT_LOG) && filename.contains(PROCESSOR) || filename.contains(COORDINATOR); } }; Path dir = Paths.get(folder); try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir, filter)) { for (Path path : stream) { this.files.add(path.toString()); } } } } public void setUp() throws Exception { this.service = new EcarfGoogleCloudServiceImpl(); TestUtils.prepare(service); } /** * i.e TIMER# Task io.ecarf.core.cloud.task.processor.DummyProcessorTask completed in 1.808 min * Process the timer on tasks * @param stats * @param line */ private void parseTaskTimer(Stats stats, String line, boolean coordinator) { double timer = this.extractAndGetTimer(line, COMPLETED_IN); if (coordinator && line.indexOf(TIMER + DUMMY_TASK + COMPLETED_IN) > -1) { ((CoordinatorStats) stats).evmAcquis = timer; } else if (coordinator && line.indexOf(TIMER + LOAD_TASK) > -1) { ((CoordinatorStats) stats).bigQueryLoad = timer; } else if (line.indexOf(TIMER + EXTRACT_TASK) > -1) { stats.extractCountTerms = timer; } else if (line.indexOf(TIMER + ASSEMBLE_DICT_TASK) > -1) { stats.assembleDictionary = timer; } else if (line.indexOf(TIMER + PROCESS_LOAD_TASK) > -1) { stats.processLoad = timer; } else if (line.indexOf(TIMER + DO_REASON_TASK) > -1 || line.indexOf(TIMER + DO_REASON_TASK1) > -1) { stats.reasonPhase = timer; } } private static final String R_INFERRED = "Finished reasoning, total inferred triples = "; private static final String R_ROWS = "Total rows retrieved from big data = "; private static final String R_GBYTES = "Total processed GBytes = "; private static final String R_SERIAL_IN_FILE = "Total process reasoning time (serialization in inf file) = "; private static final String R_EMPTY_CYCLE = "Total time spent in empty inference cycles = "; /** * 03/03/2016 09:53:42 [ INFO] [main] reason.phase2.DoReasonTask9 - Finished reasoning, total inferred triples = 19535679 03/03/2016 09:53:42 [ INFO] [main] reason.phase2.DoReasonTask9 - Total rows retrieved from big data = 30457343 03/03/2016 09:53:42 [ INFO] [main] reason.phase2.DoReasonTask9 - Total processed GBytes = 10.054763808846474 03/03/2016 09:53:42 [ INFO] [main] reason.phase2.DoReasonTask9 - Total process reasoning time (serialization in inf file) = 1.017 min 03/03/2016 09:53:42 [ INFO] [main] reason.phase2.DoReasonTask9 - Total time spent in empty inference cycles = 1.508 min */ private void extractProcessorReasoningStats(String line, ProcessorStats stats) { if (line.indexOf(R_INFERRED) > -1) { stats.inferred = Integer.parseInt(StringUtils.substringAfter(line, R_INFERRED)); } else if (line.indexOf(R_ROWS) > -1) { stats.bigQueryRows = Integer.parseInt(StringUtils.substringAfter(line, R_ROWS)); } else if (line.indexOf(R_GBYTES) > -1) { stats.bigQueryProcBytes = Double.parseDouble(StringUtils.substringAfter(line, R_GBYTES)); } else if (line.indexOf(R_SERIAL_IN_FILE) > -1) { stats.serialInFile = this.extractAndGetTimer(line, R_SERIAL_IN_FILE); } else if (line.indexOf(R_EMPTY_CYCLE) > -1) { stats.retries = this.extractAndGetTimer(line, R_EMPTY_CYCLE); } } private double extractAndGetTimer(String line, String after) { return this.extractAndGetTimer(line, after, false); } private double extractAndGetTimer(String line, String after, boolean ignoreMillis) { String timer = StringUtils.substringAfter(line, after); timer = StringUtils.remove(timer, ':'); timer = StringUtils.trim(timer); return this.parseStopwatchTime(timer, ignoreMillis); } /** * - Processing file: /tmp/wordnet_links.nt.gz.kryo.gz, dictionary items: 49382611, memory usage: 14.336268931627274GB, timer: 290.0 ms * /tmp/wikipedia_links_en.nt.gz.kryo.gz, dictionary items: 44, memory usage: 0.013648882508277893GB, timer: 2.636 s * START: Downloading file: interlanguage_links_chapters_en.nt.gz.kryo.gz, memory usage: 0.0GB * @param line * @param after * @return */ private double[] extractAndGetMemoryDictionaryItems(String line) { double memory = 0; double items = 0; String memoryStr = null; if (line.contains(TIMER_PREFIX)) { memoryStr = StringUtils.substringBetween(line, MEM_USE, TIMER_PREFIX); if (line.contains(DIC_ITEMS)) { String itemsStr = StringUtils.trim(StringUtils.substringBetween(line, DIC_ITEMS, MEM_USE)); items = Double.parseDouble(itemsStr); } } else { memoryStr = StringUtils.substringAfter(line, MEM_USE); } if (memoryStr != null) { memoryStr = StringUtils.remove(memoryStr, "GB"); memoryStr = StringUtils.strip(memoryStr); } memory = Double.parseDouble(memoryStr); double[] values = new double[] { memory, items }; return values; } /** * processor.dictionary.AssembleDictionarySubTask - Dictionary size: 44817045 * @param line * @return */ private int extractDictionarySize(String line) { return Integer.parseInt(StringUtils.substringAfter(line, DIC_SIZE)); } private static final String TIMER_PREFIX = ", timer:"; private static final String MEM_USE = ", memory usage:"; private static final String DIC_SIZE = "Dictionary size: "; private static final String DIC_ITEMS = "dictionary items: "; private static final String DIC_ASSEMBLE = "Successfully assembled dictionary with size: "; private static final String MAX_RES_ID = ", max resourceId: "; private static final String NON_CON_TIMER = "#TIMER finished creating non concurrent dictionary"; private static final String TERM_DIC_TIMER = "TIMER# serialized dictionary to file:"; //private static final String SERIAL_DICT = "Successfully serialized dictionary with size: "; private static final String SCHEMA_TERMS = "Schema terms added to the dictionary, final size: "; private static final String TERM_PARTS = " term parts "; private static final String PROCESSING = "- Processing: "; /** * Dictionary * @param dStats * @param line */ private void extractDictionaryStats(DictionaryStats dStats, String line) { if (line.contains(MEM_USE)) { double[] values = this.extractAndGetMemoryDictionaryItems(line); double memory = values[0]; dStats.memoryFootprint.add(memory); if (values[1] > 0) { dStats.memoryUsage.add(new MemUsage((int) values[1], memory)); } } else if (line.contains(DIC_SIZE)) { int size = this.extractDictionarySize(line); dStats.memoryUsage.add(new MemUsage(size, dStats.getLatestMemoryUsage())); } if (line.contains(DIC_ASSEMBLE)) { //Successfully assembled dictionary with size: 53550116, max resourceId: 54291281, memory usage: 14.449545934796333GB, timer: 5.010 min dStats.items = Integer.parseInt(StringUtils.substringBetween(line, DIC_ASSEMBLE, MAX_RES_ID)); dStats.maxResourceId = Integer.parseInt(StringUtils.substringBetween(line, MAX_RES_ID, MEM_USE)); dStats.assemble = this.extractAndGetTimer(line, TIMER_PREFIX); } /** * term.dictionary.TermDictionaryConcurrent - Creating non concurrent dictionary, memory usage: 11.166048146784306 term.dictionary.TermDictionaryConcurrent - #TIMER finished creating non concurrent dictionary, memory usage: 13.966991074383259GB, timer: 1.577 min processor.dictionary.AssembleDictionaryTask - Successfully created non concurrent dictionary for serialization, memory usage: 13.966991074383259GB, timer: 6.992 min core.utils.Utils - Serializing object of class: class io.ecarf.core.term.dictionary.TermDictionaryCore to file: /tmp/dbpedia_dictionary_8c.kryo.gz, with compress = true term.dictionary.TermDictionary - TIMER# serialized dictionary to file: /tmp/dbpedia_dictionary_8c.kryo.gz, in: 3.274 min processor.dictionary.AssembleDictionaryTask - Successfully serialized dictionary with size: 53550116, memory usage: 13.964397609233856GB, timer: 10.27 min */ if (line.contains(NON_CON_TIMER)) { dStats.nonConcurrent = this.extractAndGetTimer(line, TIMER_PREFIX); } else if (line.contains(TERM_DIC_TIMER)) { dStats.serialize = this.extractAndGetTimer(line, " in:"); } else if (line.contains(SCHEMA_TERMS)) { //processor.dictionary.AssembleDictionaryTask - Schema terms added to the dictionary, final size: 53550784 , memory usage: 14.449545934796333GB dStats.itemsAfterSchema = Integer .parseInt(StringUtils.trim(StringUtils.substringBetween(line, SCHEMA_TERMS, MEM_USE))); } //processor.dictionary.AssembleDictionarySubTask - Processing: 1718527 term parts , memory usage: 17.80723436176777GB, timer: 4.839 s if (line.contains(TERM_PARTS)) { dStats.parts += Integer .parseInt(StringUtils.trim(StringUtils.substringBetween(line, PROCESSING, TERM_PARTS))); } } /** * * @throws FileNotFoundException * @throws IOException */ private void parse() throws FileNotFoundException, IOException { System.out.println("Parsing log files: "); for (String file : files) { boolean coordinator = file.contains(COORDINATOR); Stats stats; Stats dStats = null; List<Double> bigQuerySave = null; List<Double> bigQueryLoad = null; List<Double> bigQueryQueriesElapsed = null; if (coordinator) { stats = new CoordinatorStats(); this.coordinators.add((CoordinatorStats) stats); } else { stats = new ProcessorStats(); this.processors.add((ProcessorStats) stats); bigQuerySave = new ArrayList<>(); bigQueryLoad = new ArrayList<>(); bigQueryQueriesElapsed = new ArrayList<>(); dStats = new DictionaryStats(); dStats.filename = StringUtils.substringAfterLast(file, "/"); this.dictionaries.add((DictionaryStats) dStats); } stats.filename = StringUtils.substringAfterLast(file, "/"); //System.out.println(file); String line = null; int rows = 0; int inferred = 0; try (BufferedReader r = new BufferedReader(new FileReader(file))) { do { line = r.readLine(); if (line != null) { if (line.indexOf(TIMER) > -1) { this.parseTaskTimer(stats, line, coordinator); } else if (line.indexOf(ELAPSED_JOB) > -1 && coordinator) { ((CoordinatorStats) stats).endToEnd = this.extractAndGetTimer(line, ELAPSED_JOB); } else if (line.indexOf(REASON_TASK_SUMMARY) > -1) { this.extractProcessorReasoningStats(line, (ProcessorStats) stats); } else if (line.indexOf(BIGQUERY_SAVE) > -1) { bigQuerySave.add(this.extractAndGetTimer(line, BIGQUERY_SAVE, true)); } else if (line.contains(BIGQUERY_ROWS)) { rows = Integer.parseInt(StringUtils.substringBetween(line, DOWNLOADING, BIGQUERY_ROWS)); } else if (line.indexOf(BIGQUERY_JOB_ELAPSED) > -1) { double value = this.extractAndGetTimer(line, BIGQUERY_JOB_ELAPSED, true); r.readLine(); String line1 = r.readLine(); if (line1 != null && line1.indexOf("\"configuration\" : {") > -1) { line1 = r.readLine(); if (line1.indexOf("\"load\" : {") > -1) { bigQueryLoad.add(value); if (inferred > 0) { this.bigQueryImport.add(new Rows(inferred, value)); inferred = 0; } } else if (line1.indexOf("\"query\" : {") > -1) { // fast forward to this line //"recordsWritten" : "0", do { line1 = r.readLine(); } while (line1 != null && !line1.contains("\"recordsWritten\" :")); if (line1 != null && !line1.contains("\"recordsWritten\" : \"0\",")) { if (value > 0) { bigQueryQueriesElapsed.add(value); } } } else if (line1.indexOf("\"extract\" : {") > -1) { if (rows > 0) { this.bigQueryExport.add(new Rows(rows, value)); rows = 0; } } } } else if (line.indexOf(ASSEMBLE_DICTIONARY_SUBTASK) > -1 || line.contains(ASSEMBLE_DICT_TASK) || line.contains(TERM_DICT_CON) || line.contains(TERM_DICT)) { this.extractDictionaryStats((DictionaryStats) dStats, line); } else if (coordinator) { if (line.contains(JSON_NUM_VM)) { //"numberOfProcessors": 8.0 ((CoordinatorStats) stats).numOfProcessors = (int) Double .parseDouble(StringUtils.substringAfter(line, JSON_NUM_VM + " ")); } else if (line.contains(JSON_VM_TYPE)) { //"vmType": "n1-standard-2", ((CoordinatorStats) stats).vmType = StringUtils.substringBetween(line, JSON_VM_TYPE + " \"", "\","); } } else if (!coordinator && line.contains(FILE_ITEMS)) { // line occurs twice per file, so only add if hasn't been added yet if (((ProcessorStats) stats).fileItems.isEmpty()) { String items = StringUtils.substringAfter(line, FILE_ITEMS); //processor.files.ProcessFilesTask - Processing files: [revision_ids_en.nt.gz, revision_uris_en.nt.gz, yago_taxonomy.nt.gz, interlanguage_links_chapters_en.nt.gz, geo_coordinates_en.nt.gz] List<String> fileItems = Lists .newArrayList(StringUtils.substringBetween(items, "[", "]").split(", ")); ((ProcessorStats) stats).fileItems.addAll(fileItems); } } else if (line.contains(DICT_DOWNLOAD)) { //task.processor.ProcessLoadTask - Loading the dictionary from file: /tmp/dbpedia_dictionary.kryo.gz, memory usage: 1.8702433556318283GB, timer: 9.671 s ((ProcessorStats) stats).dictionaryDowload = this.extractAndGetTimer(line, TIMER_PREFIX, true); double[] values = this.extractAndGetMemoryDictionaryItems(line); ((ProcessorStats) stats).dictionaryMemBefore = values[0]; } else if (line.contains(DICT_LOAD)) { //task.processor.ProcessLoadTask - Dictionary loaded successfully, memory usage: 3.9780617877840996GB, timer: 1.160 min ((ProcessorStats) stats).dictionaryLoad = this.extractAndGetTimer(line, TIMER_PREFIX); double[] values = this.extractAndGetMemoryDictionaryItems(line); ((ProcessorStats) stats).dictionaryMemAfter = values[0]; } if (line.contains(BIGQUERY_INF)) { //System.out.println(line); if (line.contains("DoReasonTask8")) { inferred = Integer .parseInt(StringUtils.substringBetween(line, INSERTING8, BIGQUERY_INF)); } else { inferred = Integer .parseInt(StringUtils.substringBetween(line, INSERTING, BIGQUERY_INF)); } } } } while (line != null); } if (!coordinator) { ((ProcessorStats) stats).bigQuerySave = this.sum(bigQuerySave); ((ProcessorStats) stats).bigQueryInsert = this.sum(bigQueryLoad); if (!bigQueryQueriesElapsed.isEmpty()) { ((ProcessorStats) stats).bigQueryAverageQuery = this.sum(bigQueryQueriesElapsed) / bigQueryQueriesElapsed.size(); } } } //if(!this.jobElapsedTimes.isEmpty()) { // this.coordinators.get(0).endToEnd = this.jobElapsedTimes.get(this.jobElapsedTimes.size() - 1); //} } private double sum(List<Double> values) { double sum = 0; for (double value : values) { sum += value; } return sum; } /** * return the value in minutes * @param timer * @return */ private double parseStopwatchTime(String timer, boolean ignoreMillis) { String[] parts = timer.split(" "); TimeUnit unit = UNITS.get(StringUtils.trim(parts[1])); double value = Double.parseDouble(StringUtils.trim(parts[0])); switch (unit) { case DAYS: value = value * 24 * 60; break; case HOURS: value = value * 60; break; case MICROSECONDS: value = 0; break; case MILLISECONDS: if (ignoreMillis) { value = 0; } else { value = value / (1000 * 60); } break; case MINUTES: break; case NANOSECONDS: value = 0; break; case SECONDS: value = value / 60; break; default: throw new IllegalArgumentException("Not found" + unit); } return value; } /** * */ public void printStats() { System.out.println( "------------------------------------------------ Coordinators ---------------------------------------"); for (CoordinatorStats coordinator : this.coordinators) { System.out.println(CoordinatorStats.HEADER); System.out.println(coordinator); System.out.println(); } // Marry up processors containing the same items, may be multiple runs Map<String, Set<ProcessorStats>> joined = new HashMap<>(); System.out.println( "------------------------------------------------ Processors ---------------------------------------"); Collections.sort(processors); System.out.println(); System.out.println(ProcessorStats.HEADER); for (ProcessorStats processor : processors) { System.out.println(processor); if (!processor.fileItems.isEmpty()) { String fileItem = processor.fileItems.toString(); if (joined.containsKey(fileItem)) { joined.get(fileItem).add(processor); } else { Set<ProcessorStats> stats = new HashSet<>(); stats.add(processor); joined.put(fileItem, stats); } } } System.out.println(); System.out.println( "------------------------------------------------ Joined Processors ---------------------------------------"); StringBuilder joinedUp = new StringBuilder("File items,ExtractCountTerms2PartTask,ProcessLoadTask\n"); for (Entry<String, Set<ProcessorStats>> entry : joined.entrySet()) { Set<ProcessorStats> values = entry.getValue(); if (values.size() > 1) { System.out.println( "------------------------------------------------ Joined up multi-experiment processors with same file items ---------------------------------------"); System.out.println("Filename, File items,ExtractCountTerms2PartTask,ProcessLoadTask"); double extractCountTermsAvg = 0; double processLoadAvg = 0; String fileItems = entry.getKey().replace(',', ';'); for (ProcessorStats stats : values) { System.out.println(stats.filename + ',' + fileItems + ',' + stats.extractCountTerms + ',' + stats.processLoad); extractCountTermsAvg += stats.extractCountTerms; processLoadAvg += stats.processLoad; } extractCountTermsAvg = extractCountTermsAvg / values.size(); processLoadAvg = processLoadAvg / values.size(); // averages joinedUp.append(fileItems).append(',').append(extractCountTermsAvg).append(',') .append(processLoadAvg).append('\n'); System.out.println(); } } System.out.println(joinedUp.toString()); System.out.println(); System.out.println( "------------------------------------------------ Dictionary ---------------------------------------"); for (DictionaryStats dictionary : this.dictionaries) { System.out.println(DictionaryStats.HEADER); System.out.println(dictionary); //System.out.println(); System.out.print(dictionary.getMemoryStatsString()); System.out.println(); } System.out.println( "------------------------------------------------ BigQuery Export Stats ---------------------------------------"); System.out.println("Rows,Time"); for (Rows rows : this.bigQueryExport) { System.out.println(rows.rows + "," + (int) (rows.time * 60)); } System.out.println( "------------------------------------------------ BigQuery Import Stats ---------------------------------------"); System.out.println("Rows,Time"); for (Rows rows : this.bigQueryImport) { System.out.println(rows.rows + "," + (int) (rows.time * 60)); } } /** * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("Usage: LogParser gs://<bucket> or LogParser /path/to/directory"); System.exit(-1); } String path = args[0]; LogParser parser = new LogParser(path); parser.parse(); parser.printStats(); } public class Stats implements Comparable<Stats> { String filename; double extractCountTerms; double assembleDictionary; double processLoad; double reasonPhase; @Override public int compareTo(Stats o) { return this.filename.compareTo(o.filename); } } public class MemUsage { int parts; double memory; /** * @param parts * @param memory */ public MemUsage(int parts, double memory) { super(); this.parts = parts; this.memory = memory; } } public class DictionaryStats extends Stats { int parts; int itemsAfterSchema; int items; int maxResourceId; double assemble; List<Double> memoryFootprint = new ArrayList<>(); double nonConcurrent; double serialize; double upload; List<MemUsage> memoryUsage = new ArrayList<>(); /** * * @return */ public double getLatestMemoryUsage() { double memory = 0; if (!memoryFootprint.isEmpty()) { memory = this.memoryFootprint.get(this.memoryFootprint.size() - 1); } return memory; } static final String HEADER = "Filename, Parts, Dictionary Items After Schema, Dictionary Items,Max Resource Id, Assemble Time (min),Min memory (GB), " + "Max memory(GB),Create Non Concurrent(sec), Serialize Time(min), Upload Time (sec)"; /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { double minMem = 0; double maxMem = 0; if (!this.memoryFootprint.isEmpty()) { Collections.sort(this.memoryFootprint); minMem = this.memoryFootprint.get(0); maxMem = this.memoryFootprint.get(this.memoryFootprint.size() - 1); } return new StringBuilder(filename).append(',').append(parts).append(',').append(itemsAfterSchema) .append(',').append(items).append(',').append(maxResourceId).append(',').append(assemble) .append(',').append(minMem).append(',').append(maxMem).append(',').append(nonConcurrent * 60) .append(',').append(serialize).append(',').append(upload * 60).toString(); } /** * * @return */ public String getMemoryStatsString() { StringBuilder memory = new StringBuilder("Dictionary Size, Memory\n"); for (MemUsage usage : this.memoryUsage) { memory.append(usage.parts).append(',').append(usage.memory).append('\n'); } return memory.toString(); } } public class CoordinatorStats extends Stats { double evmAcquis; //double loadPhase; double bigQueryLoad; double endToEnd; String vmType; int numOfProcessors; //List<Double> jobElapsedTimes = new ArrayList<>(); static final String HEADER = "Filename,VM Type, No of VMs,EVM acquisition time (min),ExtractCountTerms2PartTask,AssembleDictionaryTask,ProcessLoadTask," + "Loading Phase,Bigquery load time (sec),Reasoning Phase (min),End to End (min)"; /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return new StringBuilder(filename).append(',').append(vmType).append(',').append(numOfProcessors) .append(',').append(evmAcquis).append(',').append(extractCountTerms).append(',') .append(assembleDictionary).append(',').append(processLoad).append(',').append(',') .append(bigQueryLoad * 60).append(',').append(reasonPhase).append(',').append(endToEnd) .toString(); } } public class Rows { int rows; double time; /** * @param rows * @param time */ public Rows(int rows, double time) { super(); this.rows = rows; this.time = time; } } public class ProcessorStats extends Stats { int inferred; int bigQueryRows; double bigQuerySave; double bigQueryProcBytes; double serialInFile; double retries; double bigQueryAverageQuery; double bigQueryInsert; double dictionaryDowload; double dictionaryLoad; double dictionaryMemBefore; double dictionaryMemAfter; List<String> fileItems = new ArrayList<>(); static final String HEADER = "Filename,ExtractCountTerms2PartTask,AssembleDictionaryTask,ProcessLoadTask,Inferred,Retrieved Bigquery Rows," + "Bigquery results save time (min),Big Query Table size GB,Big Query Table rows,Bigquery Total Bytes Processed (GB)," + "Node Reasoning Time(sec),8 retries with 10s sleep,Bigquery average Query time (sec),Bigquery Reason insert(min),Reasoning Phase (min)," + "Dictionary Download (s), Dictionary Load (min), Dictionary Memory Before (GB), Dictionary Memory After (GB)"; /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return new StringBuilder(filename).append(',').append(extractCountTerms).append(',') .append(assembleDictionary).append(',').append(processLoad).append(',').append(inferred) .append(',').append(bigQueryRows).append(',').append(bigQuerySave).append(',').append(',') .append(',').append(bigQueryProcBytes).append(',').append(serialInFile * 60).append(',') .append(retries).append(',').append(bigQueryAverageQuery * 60).append(',') .append(bigQueryInsert).append(',').append(reasonPhase).append(',') .append(dictionaryDowload * 60).append(',').append(dictionaryLoad).append(',') .append(dictionaryMemBefore).append(',').append(dictionaryMemAfter).toString(); } } }