Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.chukwa.dataloader; import java.io.IOException; import java.net.URISyntaxException; import java.sql.Connection; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.concurrent.Callable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.chukwa.conf.ChukwaConfiguration; import org.apache.hadoop.chukwa.database.DatabaseConfig; import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord; import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey; import org.apache.hadoop.chukwa.extraction.engine.RecordUtil; import org.apache.hadoop.chukwa.util.ClusterConfig; import org.apache.hadoop.chukwa.util.DatabaseWriter; import org.apache.hadoop.chukwa.util.ExceptionUtil; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; public class MetricDataLoader implements Callable { private static Log log = LogFactory.getLog(MetricDataLoader.class); private Statement stmt = null; private ResultSet rs = null; private DatabaseConfig mdlConfig = null; private HashMap<String, String> normalize = null; private HashMap<String, String> transformer = null; private HashMap<String, Float> conversion = null; private HashMap<String, String> dbTables = null; private HashMap<String, HashMap<String, Integer>> dbSchema = null; private String newSpace = "-"; private boolean batchMode = true; private Connection conn = null; private Path source = null; private static ChukwaConfiguration conf = null; private static FileSystem fs = null; private String jdbc_url = ""; /** Creates a new instance of DBWriter */ public MetricDataLoader(ChukwaConfiguration conf, FileSystem fs, String fileName) { source = new Path(fileName); this.conf = conf; this.fs = fs; } private void initEnv(String cluster) throws Exception { mdlConfig = new DatabaseConfig(); transformer = mdlConfig.startWith("metric."); conversion = new HashMap<String, Float>(); normalize = mdlConfig.startWith("normalize."); dbTables = mdlConfig.startWith("report.db.name."); Iterator<?> entries = mdlConfig.iterator(); while (entries.hasNext()) { String entry = entries.next().toString(); if (entry.startsWith("conversion.")) { String[] metrics = entry.split("="); try { float convertNumber = Float.parseFloat(metrics[1]); conversion.put(metrics[0], convertNumber); } catch (NumberFormatException ex) { log.error(metrics[0] + " is not a number."); } } } log.debug("cluster name:" + cluster); if (!cluster.equals("")) { ClusterConfig cc = new ClusterConfig(); jdbc_url = cc.getURL(cluster); } try { DatabaseWriter dbWriter = new DatabaseWriter(cluster); conn = dbWriter.getConnection(); } catch (Exception ex) { throw new Exception("JDBC URL does not exist for:" + jdbc_url); } log.debug("Initialized JDBC URL: " + jdbc_url); HashMap<String, String> dbNames = mdlConfig.startWith("report.db.name."); Iterator<String> ki = dbNames.keySet().iterator(); dbSchema = new HashMap<String, HashMap<String, Integer>>(); while (ki.hasNext()) { String recordType = ki.next().toString(); String table = dbNames.get(recordType); try { ResultSet rs = conn.getMetaData().getColumns(null, null, table + "_template", null); HashMap<String, Integer> tableSchema = new HashMap<String, Integer>(); while (rs.next()) { String name = rs.getString("COLUMN_NAME"); int type = rs.getInt("DATA_TYPE"); tableSchema.put(name, type); StringBuilder metricName = new StringBuilder(); metricName.append("metric."); metricName.append(recordType.substring(15)); metricName.append("."); metricName.append(name); String mdlKey = metricName.toString().toLowerCase(); if (!transformer.containsKey(mdlKey)) { transformer.put(mdlKey, name); } } rs.close(); dbSchema.put(table, tableSchema); } catch (SQLException ex) { log.debug("table: " + table + " template does not exist, MDL will not load data for this table."); } } stmt = conn.createStatement(); conn.setAutoCommit(false); } public void interrupt() { } private String escape(String s, String c) { String ns = s.trim(); Pattern pattern = Pattern.compile(" +"); Matcher matcher = pattern.matcher(ns); String s2 = matcher.replaceAll(c); return s2; } public static String escapeQuotes(String s) { StringBuffer sb = new StringBuffer(); int index; int length = s.length(); char ch; for (index = 0; index < length; ++index) { if ((ch = s.charAt(index)) == '\"') { sb.append("\\\""); } else if (ch == '\\') { sb.append("\\\\"); } else { sb.append(ch); } } return (sb.toString()); } public boolean run() { boolean first = true; log.info("StreamName: " + source.getName()); SequenceFile.Reader reader = null; try { // The newInstance() call is a work around for some // broken Java implementations reader = new SequenceFile.Reader(fs, source, conf); } catch (Exception ex) { // handle the error log.error(ex, ex); } long currentTimeMillis = System.currentTimeMillis(); boolean isSuccessful = true; String recordType = null; ChukwaRecordKey key = new ChukwaRecordKey(); ChukwaRecord record = new ChukwaRecord(); String cluster = null; int numOfRecords = 0; try { Pattern p = Pattern.compile("(.*)\\-(\\d+)$"); int batch = 0; while (reader.next(key, record)) { numOfRecords++; if (first) { try { cluster = RecordUtil.getClusterName(record); initEnv(cluster); first = false; } catch (Exception ex) { log.error("Initialization failed for: " + cluster + ". Please check jdbc configuration."); return false; } } String sqlTime = DatabaseWriter.formatTimeStamp(record.getTime()); log.debug("Timestamp: " + record.getTime()); log.debug("DataType: " + key.getReduceType()); String[] fields = record.getFields(); String table = null; String[] priKeys = null; HashMap<String, HashMap<String, String>> hashReport = new HashMap<String, HashMap<String, String>>(); StringBuilder normKey = new StringBuilder(); String node = record.getValue("csource"); recordType = key.getReduceType().toLowerCase(); String dbKey = "report.db.name." + recordType; Matcher m = p.matcher(recordType); if (dbTables.containsKey(dbKey)) { String[] tmp = mdlConfig.findTableName(mdlConfig.get(dbKey), record.getTime(), record.getTime()); table = tmp[0]; } else if (m.matches()) { String timePartition = "_week"; int timeSize = Integer.parseInt(m.group(2)); if (timeSize == 5) { timePartition = "_month"; } else if (timeSize == 30) { timePartition = "_quarter"; } else if (timeSize == 180) { timePartition = "_year"; } else if (timeSize == 720) { timePartition = "_decade"; } int partition = (int) (record.getTime() / timeSize); StringBuilder tmpDbKey = new StringBuilder(); tmpDbKey.append("report.db.name."); tmpDbKey.append(m.group(1)); if (dbTables.containsKey(tmpDbKey.toString())) { StringBuilder tmpTable = new StringBuilder(); tmpTable.append(dbTables.get(tmpDbKey.toString())); tmpTable.append("_"); tmpTable.append(partition); tmpTable.append("_"); tmpTable.append(timePartition); table = tmpTable.toString(); } else { log.debug(tmpDbKey.toString() + " does not exist."); continue; } } else { log.debug(dbKey + " does not exist."); continue; } log.debug("table name:" + table); try { priKeys = mdlConfig.get("report.db.primary.key." + recordType).split(","); } catch (Exception nullException) { } for (String field : fields) { String keyName = escape(field.toLowerCase(), newSpace); String keyValue = escape(record.getValue(field).toLowerCase(), newSpace); StringBuilder buildKey = new StringBuilder(); buildKey.append("normalize."); buildKey.append(recordType); buildKey.append("."); buildKey.append(keyName); if (normalize.containsKey(buildKey.toString())) { if (normKey.toString().equals("")) { normKey.append(keyName); normKey.append("."); normKey.append(keyValue); } else { normKey.append("."); normKey.append(keyName); normKey.append("."); normKey.append(keyValue); } } StringBuilder normalizedKey = new StringBuilder(); normalizedKey.append("metric."); normalizedKey.append(recordType); normalizedKey.append("."); normalizedKey.append(normKey); if (hashReport.containsKey(node)) { HashMap<String, String> tmpHash = hashReport.get(node); tmpHash.put(normalizedKey.toString(), keyValue); hashReport.put(node, tmpHash); } else { HashMap<String, String> tmpHash = new HashMap<String, String>(); tmpHash.put(normalizedKey.toString(), keyValue); hashReport.put(node, tmpHash); } } for (String field : fields) { String valueName = escape(field.toLowerCase(), newSpace); String valueValue = escape(record.getValue(field).toLowerCase(), newSpace); StringBuilder buildKey = new StringBuilder(); buildKey.append("metric."); buildKey.append(recordType); buildKey.append("."); buildKey.append(valueName); if (!normKey.toString().equals("")) { buildKey = new StringBuilder(); buildKey.append("metric."); buildKey.append(recordType); buildKey.append("."); buildKey.append(normKey); buildKey.append("."); buildKey.append(valueName); } String normalizedKey = buildKey.toString(); if (hashReport.containsKey(node)) { HashMap<String, String> tmpHash = hashReport.get(node); tmpHash.put(normalizedKey, valueValue); hashReport.put(node, tmpHash); } else { HashMap<String, String> tmpHash = new HashMap<String, String>(); tmpHash.put(normalizedKey, valueValue); hashReport.put(node, tmpHash); } } Iterator<String> i = hashReport.keySet().iterator(); while (i.hasNext()) { Object iteratorNode = i.next(); HashMap<String, String> recordSet = hashReport.get(iteratorNode); Iterator<String> fi = recordSet.keySet().iterator(); // Map any primary key that was not included in the report keyName StringBuilder sqlPriKeys = new StringBuilder(); try { for (String priKey : priKeys) { if (priKey.equals("timestamp")) { sqlPriKeys.append(priKey); sqlPriKeys.append(" = \""); sqlPriKeys.append(sqlTime); sqlPriKeys.append("\""); } if (!priKey.equals(priKeys[priKeys.length - 1])) { sqlPriKeys.append(sqlPriKeys); sqlPriKeys.append(", "); } } } catch (Exception nullException) { // ignore if primary key is empty } // Map the hash objects to database table columns StringBuilder sqlValues = new StringBuilder(); boolean firstValue = true; while (fi.hasNext()) { String fieldKey = fi.next(); if (transformer.containsKey(fieldKey) && transformer.get(fieldKey).intern() != "_delete".intern()) { if (!firstValue) { sqlValues.append(", "); } try { if (dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.VARCHAR || dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.BLOB) { String conversionKey = "conversion." + fieldKey; if (conversion.containsKey(conversionKey)) { sqlValues.append(transformer.get(fieldKey)); sqlValues.append("="); sqlValues.append(recordSet.get(fieldKey)); sqlValues.append(conversion.get(conversionKey).toString()); } else { sqlValues.append(transformer.get(fieldKey)); sqlValues.append("=\'"); sqlValues.append(escapeQuotes(recordSet.get(fieldKey))); sqlValues.append("\'"); } } else if (dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.TIMESTAMP) { SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); Date recordDate = new Date(); recordDate.setTime(Long.parseLong(recordSet.get(fieldKey))); sqlValues.append(transformer.get(fieldKey)); sqlValues.append("=\""); sqlValues.append(formatter.format(recordDate)); sqlValues.append("\""); } else if (dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.BIGINT || dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.TINYINT || dbSchema.get(dbTables.get(dbKey)) .get(transformer.get(fieldKey)) == java.sql.Types.INTEGER) { long tmp = 0; try { tmp = Long.parseLong(recordSet.get(fieldKey).toString()); String conversionKey = "conversion." + fieldKey; if (conversion.containsKey(conversionKey)) { tmp = tmp * Long.parseLong(conversion.get(conversionKey).toString()); } } catch (Exception e) { tmp = 0; } sqlValues.append(transformer.get(fieldKey)); sqlValues.append("="); sqlValues.append(tmp); } else { double tmp = 0; tmp = Double.parseDouble(recordSet.get(fieldKey).toString()); String conversionKey = "conversion." + fieldKey; if (conversion.containsKey(conversionKey)) { tmp = tmp * Double.parseDouble(conversion.get(conversionKey).toString()); } if (Double.isNaN(tmp)) { tmp = 0; } sqlValues.append(transformer.get(fieldKey)); sqlValues.append("="); sqlValues.append(tmp); } firstValue = false; } catch (NumberFormatException ex) { String conversionKey = "conversion." + fieldKey; if (conversion.containsKey(conversionKey)) { sqlValues.append(transformer.get(fieldKey)); sqlValues.append("="); sqlValues.append(recordSet.get(fieldKey)); sqlValues.append(conversion.get(conversionKey).toString()); } else { sqlValues.append(transformer.get(fieldKey)); sqlValues.append("='"); sqlValues.append(escapeQuotes(recordSet.get(fieldKey))); sqlValues.append("'"); } firstValue = false; } catch (NullPointerException ex) { log.error("dbKey:" + dbKey + " fieldKey:" + fieldKey + " does not contain valid MDL structure."); } } } StringBuilder sql = new StringBuilder(); if (sqlPriKeys.length() > 0) { sql.append("INSERT INTO "); sql.append(table); sql.append(" SET "); sql.append(sqlPriKeys.toString()); sql.append(","); sql.append(sqlValues.toString()); sql.append(" ON DUPLICATE KEY UPDATE "); sql.append(sqlPriKeys.toString()); sql.append(","); sql.append(sqlValues.toString()); sql.append(";"); } else { if (sqlValues.length() > 0) { sql.append("INSERT INTO "); sql.append(table); sql.append(" SET "); sql.append(sqlValues.toString()); sql.append(" ON DUPLICATE KEY UPDATE "); sql.append(sqlValues.toString()); sql.append(";"); } } if (sql.length() > 0) { log.trace(sql); if (batchMode) { stmt.addBatch(sql.toString()); batch++; } else { stmt.execute(sql.toString()); } if (batchMode && batch > 20000) { int[] updateCounts = stmt.executeBatch(); log.info("Batch mode inserted=" + updateCounts.length + "records."); batch = 0; } } } } if (batchMode) { int[] updateCounts = stmt.executeBatch(); log.info("Batch mode inserted=" + updateCounts.length + "records."); } } catch (SQLException ex) { // handle any errors isSuccessful = false; log.error(ex, ex); log.error("SQLException: " + ex.getMessage()); log.error("SQLState: " + ex.getSQLState()); log.error("VendorError: " + ex.getErrorCode()); } catch (Exception e) { isSuccessful = false; log.error(ExceptionUtil.getStackTrace(e)); } finally { if (batchMode && conn != null) { try { conn.commit(); log.info("batchMode commit done"); } catch (SQLException ex) { log.error(ex, ex); log.error("SQLException: " + ex.getMessage()); log.error("SQLState: " + ex.getSQLState()); log.error("VendorError: " + ex.getErrorCode()); } } long latencyMillis = System.currentTimeMillis() - currentTimeMillis; int latencySeconds = ((int) (latencyMillis + 500)) / 1000; String logMsg = (isSuccessful ? "Saved" : "Error occurred in saving"); log.info(logMsg + " (" + recordType + "," + cluster + ") " + latencySeconds + " sec. numOfRecords: " + numOfRecords); if (rs != null) { try { rs.close(); } catch (SQLException ex) { log.error(ex, ex); log.error("SQLException: " + ex.getMessage()); log.error("SQLState: " + ex.getSQLState()); log.error("VendorError: " + ex.getErrorCode()); } rs = null; } if (stmt != null) { try { stmt.close(); } catch (SQLException ex) { log.error(ex, ex); log.error("SQLException: " + ex.getMessage()); log.error("SQLState: " + ex.getSQLState()); log.error("VendorError: " + ex.getErrorCode()); } stmt = null; } if (conn != null) { try { conn.close(); } catch (SQLException ex) { log.error(ex, ex); log.error("SQLException: " + ex.getMessage()); log.error("SQLState: " + ex.getSQLState()); log.error("VendorError: " + ex.getErrorCode()); } conn = null; } if (reader != null) { try { reader.close(); } catch (Exception e) { log.warn("Could not close SequenceFile.Reader:", e); } reader = null; } } return true; } public Boolean call() { return run(); } public static void main(String[] args) { try { conf = new ChukwaConfiguration(); fs = FileSystem.get(conf); MetricDataLoader mdl = new MetricDataLoader(conf, fs, args[0]); mdl.run(); } catch (Exception e) { e.printStackTrace(); } } }