Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package hitune.analysis.mapreduce.processor; import hitune.analysis.mapreduce.AnalysisProcessorConfiguration; import hitune.analysis.mapreduce.CSVFileOutputFormat; import hitune.analysis.mapreduce.HiTuneKey; import hitune.analysis.mapreduce.HiTuneRecord; import hitune.analysis.mapreduce.MultiSequenceFileInputFormat; import hitune.analysis.mapreduce.TextArrayWritable; import java.io.IOException; import java.io.StringBufferInputStream; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord; import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.record.Record; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import org.xml.sax.SAXException; /** * Re-organize the instrumented data for Map tasks, and get the statistics * * */ public class InstrumentDataflow extends AnalysisProcessor { static Logger log = Logger.getLogger(InstrumentDataflow.class); /** * Get each phase's metrics including : * 1. its function list, function sampling count * 2. its start,end time * 3. its status list and status count * 4. its function-status count * <br> Each output record represents one sampling point. */ public static class MapClass<K extends Record, V extends Record> extends MapReduceBase implements Mapper<K, V, K, V> { JobConf conf = null; List<String> nodelist = new ArrayList<String>(); Map<String, List<String>> phases = new HashMap<String, List<String>>(); Map<String, String> phasealias = new HashMap<String, String>(); List<String> statuslist = new ArrayList<String>(); @Override public void configure(JobConf jobConf) { super.configure(jobConf); this.conf = jobConf; init(); } void parsePhase() { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // Now use the factory to create a DOM parser (a.k.a. a DocumentBuilder) DocumentBuilder parser; try { parser = factory.newDocumentBuilder(); // Parse the file and build a Document tree to represent its content Document document = parser .parse(new StringBufferInputStream("<root>" + conf.get("phases") + "</root>")); // Ask the document for a list of all phases NodeList rows = document.getElementsByTagName(AnalysisProcessorConfiguration.phase); int phasenumber = rows.getLength(); for (int i = 0; i < phasenumber; i++) { Node phase = rows.item(i); NodeList fields = phase.getChildNodes(); String phasename = null; String stacks = null; String funcs = null; List<String> functionlist = new ArrayList<String>(); for (int j = 0; j < fields.getLength(); j++) { Node fieldNode = fields.item(j); if (!(fieldNode instanceof Element)) continue; Element field = (Element) fieldNode; if ("phasename".equals(field.getTagName()) && field.hasChildNodes()) phasename = ((org.w3c.dom.Text) field.getFirstChild()).getData().trim(); else if ("stack".equals(field.getTagName()) && field.hasChildNodes()) stacks = ((org.w3c.dom.Text) field.getFirstChild()).getData(); else if ("functions".equals(field.getTagName()) && field.hasChildNodes()) funcs = ((org.w3c.dom.Text) field.getFirstChild()).getData(); } if (stacks != null && stacks.length() != 0) stacks = stacks.replace(" ", ""); else stacks = ""; phasealias.put(stacks, phasename); if (funcs == null) { continue; } for (String func : funcs.split(SEPERATOR_COMMA)) { functionlist.add(func); } this.phases.put(stacks, functionlist); } } catch (ParserConfigurationException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (SAXException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } } private void init() { String nodes = conf.get(AnalysisProcessorConfiguration.nodes); this.nodelist = String2List(nodes, SEPERATOR_COMMA); String status = conf.get("status"); this.statuslist = String2List(status, SEPERATOR_COMMA); parsePhase(); } private String count(String dest, List<String> patternList) { StringBuilder results = new StringBuilder(); if (dest == null || patternList == null || patternList.size() <= 0) { return ""; } for (String pattern : patternList) { Pattern p = Pattern.compile(pattern); Matcher matcher = p.matcher(dest); if (matcher.find()) { results.append("1").append(SEPERATOR_COMMA); } else { results.append("0").append(SEPERATOR_COMMA); } } log.debug("results:" + results.toString()); return results.toString().substring(0, results.length() - 1); } @Override public void map(K key, V value, OutputCollector<K, V> output, Reporter reporter) throws IOException { // TODO Auto-generated method stub //doing the filter //<key,value> //<[AttemptID/PhaseStack/PhaseAlias], [ThreadName,ThreadId,starttime,endtime,funlist,funcountlist,statelist,statecountlist,funStateMatric]> HiTuneRecord valproxy = new HiTuneRecord(value); String hostname = valproxy.getHost(); String status = valproxy.getValue("ThreadState"); String stack = valproxy.getValue("CallStack"); String attemptID = valproxy.getValue("TaskID"); log.debug("hostname:" + hostname + " ThreadState:" + status + " stack:" + stack + " attemptID:" + attemptID); if (isMatched(this.nodelist, hostname)) { for (String s : phasealias.keySet()) { log.debug("phasealias:" + s); if (s == null || s.length() == 0) s = ""; Pattern p = Pattern.compile(s); if (stack != null && stack.length() != 0) stack = stack.replace(" ", ""); else stack = ""; Matcher matcher = p.matcher(stack); if (matcher.find()) { try { log.debug("find pattern"); K newkey = (K) key.getClass().getConstructor().newInstance(); V newval = (V) value.getClass().getConstructor().newInstance(); HiTuneKey newkeyproxy = new HiTuneKey(newkey); HiTuneRecord newvalproxy = new HiTuneRecord(newval); newkeyproxy.setKey(attemptID + "/" + s + "/" + phasealias.get(s)); newkeyproxy.setDataType(new HiTuneKey(key).getDataType()); newvalproxy.copyCommonFields(value); newvalproxy.add("thread_id", valproxy.getValue("ThreadID")); newvalproxy.add("thread_name", valproxy.getValue("ThreadName")); newvalproxy.add("attempt_id", attemptID); newvalproxy.add("phase_stack", s); newvalproxy.add("phase_name", phasealias.get(s)); newvalproxy.add("start", "" + newvalproxy.getTime()); newvalproxy.add("count", "1"); log.debug("status:" + conf.get("status")); newvalproxy.add("statusList", conf.get("status")); newvalproxy.add("statusCount", count(status, this.statuslist)); log.debug("funList:" + this.phases.get(s)); newvalproxy.add("funList", List2String(this.phases.get(s), SEPERATOR_COMMA)); newvalproxy.add("funCount", count(stack, this.phases.get(s))); newvalproxy.add(AnalysisProcessorConfiguration.jobid, conf.get(AnalysisProcessorConfiguration.jobid)); log.debug("Key:" + newkeyproxy.toString() + " Record" + newkeyproxy.toString()); output.collect((K) newkeyproxy.getObject(), (V) newvalproxy.getObject()); } catch (IllegalArgumentException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (SecurityException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (InstantiationException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (InvocationTargetException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } catch (NoSuchMethodException e) { // TODO Auto-generated catch block log.warn(e); e.printStackTrace(); } } } } } } /** * Calculate each phase's statistics: * 1. choose minimum start time as the start time. * 2. choose maximum end time as the end time. * 3. sum the sampling count with certain status. * 4. sum the function sampling count * 5. sum the phase's count * The analyzer won't tell that if the phase is continuous or not in the time sequence. */ public static class ReduceClass<K extends Record, V extends Record> extends MapReduceBase implements Reducer<K, V, Text, TextArrayWritable> { static boolean initialized = false; /** * Add two vectors * @param a * @param b * @param seperator * @return */ String vectorAdd(String a, String b, String seperator) { StringBuilder result = new StringBuilder(); List<String> list_a = String2List(a, seperator); List<String> list_b = String2List(b, seperator); if (list_a == null || list_b == null || list_a.size() != list_b.size() || list_a.size() == 0 || list_b.size() == 0) { return ""; } for (int i = 0; i < list_a.size(); i++) { int _a = Integer.parseInt(list_a.get(i)); int _b = Integer.parseInt(list_b.get(i)); int sum = _a + _b; result.append(sum).append(seperator); } return result.toString().substring(0, result.length() - seperator.length()); } @Override public void reduce(K key, Iterator<V> values, OutputCollector<Text, TextArrayWritable> output, Reporter reporter) throws IOException { // TODO Auto-generated method stub //organizing into csv format Map<String, String> newRecord = new HashMap<String, String>(); String[] headers = new String[] { "attempt_id", "breakdown_count", "breakdown_name", "breakdown_type", "host", "job_id", "phase_count", "phase_end", "phase_name", "phase_stack", "phase_start", "thread_id", "thread_name" }; for (String head : headers) { newRecord.put(head, ""); } long start = -1, end = -1; long phaseCount = 0; String funcCount = "", statusCount = ""; String funcList = "", statusList = ""; while (values.hasNext()) { HiTuneRecord valproxy = new HiTuneRecord(values.next()); long phaseStart = Long.parseLong(valproxy.getValue("start")); long phaseEnd = Long.parseLong(valproxy.getValue("start")); start = start == -1 ? phaseStart : Math.min(start, phaseStart); end = end == -1 ? phaseEnd : Math.max(end, phaseEnd); phaseCount++; funcCount = funcCount == "" ? valproxy.getValue("funCount") : vectorAdd(valproxy.getValue("funCount"), funcCount, SEPERATOR_COMMA); statusCount = statusCount == "" ? valproxy.getValue("statusCount") : vectorAdd(valproxy.getValue("statusCount"), statusCount, SEPERATOR_COMMA); newRecord.put("host", valproxy.getHost()); newRecord.put("job_id", valproxy.getValue(AnalysisProcessorConfiguration.jobid)); newRecord.put("phase_stack", valproxy.getValue("phase_stack")); newRecord.put("phase_name", valproxy.getValue("phase_name")); newRecord.put("attempt_id", valproxy.getValue("attempt_id")); newRecord.put("thread_id", valproxy.getValue("thread_id")); newRecord.put("thread_name", valproxy.getValue("thread_name")); funcList = valproxy.getValue("funList"); statusList = valproxy.getValue("statusList"); } newRecord.put("phase_start", "" + start); newRecord.put("phase_end", "" + end); newRecord.put("phase_count", "" + phaseCount); if (!initialized) { TextArrayWritable newValue = new TextArrayWritable(newRecord.keySet().toArray(new String[0])); output.collect(null, newValue); initialized = true; } if (!funcCount.equals("")) { newRecord.put("breakdown_type", "function"); log.debug("funcList: " + funcList); log.debug("funcCount: " + funcCount); List<String> tmp = String2List(funcList, SEPERATOR_COMMA); List<String> counts = String2List(funcCount, SEPERATOR_COMMA); for (int i = 0; i < tmp.size(); i++) { log.debug("function:" + tmp.get(i) + " count:" + counts.get(i)); newRecord.put("breakdown_name", tmp.get(i)); newRecord.put("breakdown_count", "" + counts.get(i)); String[] contents = new String[newRecord.keySet().size()]; int j = 0; for (String index : newRecord.keySet()) { contents[j] = newRecord.get(index); log.debug("content: " + index + "," + contents[j]); j++; } TextArrayWritable newValue = new TextArrayWritable(contents); output.collect(null, newValue); contents = null; } } if (!statusCount.equals("")) { newRecord.put("breakdown_type", "state"); log.debug("statusList: " + statusList); log.debug("statusCount: " + statusCount); List<String> tmp = String2List(statusList, SEPERATOR_COMMA); List<String> counts = String2List(statusCount, SEPERATOR_COMMA); for (int i = 0; i < tmp.size(); i++) { log.debug("function:" + tmp.get(i) + " count:" + counts.get(i)); newRecord.put("breakdown_name", tmp.get(i)); newRecord.put("breakdown_count", "" + counts.get(i)); String[] contents = new String[newRecord.size()]; int j = 0; for (String index : newRecord.keySet()) { contents[j] = newRecord.get(index); j++; log.debug("content: " + index + "," + contents[i]); } TextArrayWritable newValue = new TextArrayWritable(contents); output.collect(null, newValue); contents = null; } } } } /** * @param conf */ public InstrumentDataflow(Configuration conf) { super(conf); // TODO Auto-generated constructor stub } /* (non-Javadoc) * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run() */ @Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try { conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); //conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } }