Java tutorial
package com.splout.db.engine; /* * #%L * Splout SQL Hadoop library * %% * Copyright (C) 2012 - 2013 Datasalt Systems S.L. * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.datasalt.pangool.io.ITuple; import com.datasalt.pangool.io.Schema.Field; import com.datasalt.pangool.io.Schema.Field.Type; import com.splout.db.common.CompressorUtil; import com.splout.db.common.PortUtils; import com.splout.db.common.PortUtils.PortLock; import com.splout.db.engine.EmbeddedMySQL.EmbeddedMySQLConfig; import com.splout.db.hadoop.TableSpec; import com.splout.db.hadoop.engine.SploutSQLOutputFormat; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.FileFilterUtils; import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.Statement; import java.util.HashMap; import java.util.Map; @SuppressWarnings("serial") public class MySQLOutputFormat extends SploutSQLOutputFormat implements Serializable { public static Log LOG = LogFactory.getLog(MySQLOutputFormat.class); public static String STRING_FIELD_SIZE_PANGOOL_FIELD_PROP = "com.splout.db.engine.MySQLOutputFormat.string.field.size"; public static String AUTO_TRIM_STRING_PANGOOL_FIELD_PROP = "com.splout.db.engine.MySQLOutputFormat.auto.trim.string"; public static String GLOBAL_ENGINE_CONF_PROP = "com.splout.db.engine.MySQLOutputFormat.engine"; public static String GLOBAL_CHARSET_CONF_PROP = "com.splout.db.engine.MySQLOutputFormat.charset"; public static String GLOBAL_MEMORY_AVAILABLE_FOR_INDEXING = "com.splout.db.engine.MySQLOutputFormat.memory.indexing"; public static String GLOBAL_STRING_FIELD_SIZE = "com.splout.db.engine.MySQLOutputFormat.string.field.size"; public static String GLOBAL_AUTO_TRIM_STRING = "com.splout.db.engine.MySQLOutputFormat.auto.trim.string"; public static String GENERATED_DB_NAME = "splout"; // Keep track of all opened Mysqlds so we can kill them in any case private Map<Integer, EmbeddedMySQL> mySQLs = new HashMap<Integer, EmbeddedMySQL>(); protected Integer globalStringFieldSize; protected Boolean globalAutoTrim = null; public MySQLOutputFormat(Integer batchSize, TableSpec... dbSpec) throws SploutSQLOutputFormatException { super(batchSize, dbSpec); } @Override public String getCreateTable(TableSpec tableSpec) throws SploutSQLOutputFormatException { loadGlobalConf(); String engine = getConf().get(GLOBAL_ENGINE_CONF_PROP, "MyIsam"); String charset = getConf().get(GLOBAL_CHARSET_CONF_PROP, "UTF8"); String createTable = "CREATE TABLE " + tableSpec.getSchema().getName() + " ("; for (Field field : tableSpec.getSchema().getFields()) { int fieldSize = fixedSizeStringField(field); if (field.getName().equals(PARTITION_TUPLE_FIELD)) { continue; } createTable += "`" + field.getName() + "` "; switch (field.getType()) { case INT: createTable += "INTEGER, "; break; case LONG: createTable += "LONG, "; break; case DOUBLE: createTable += "DOUBLE, "; break; case FLOAT: createTable += "FLOAT, "; break; case STRING: if (fieldSize > -1) { createTable += "VARCHAR(" + fieldSize + "), "; } else { createTable += "TEXT, "; } break; case BOOLEAN: createTable += "BOOLEAN, "; break; default: throw new SploutSQLOutputFormatException("Unsupported field type: " + field.getType()); } } createTable = createTable.substring(0, createTable.length() - 2); return createTable += ") ENGINE=" + engine + " DEFAULT CHARSET=" + charset; } // Map of prepared statements per Schema and per Partition private Map<Integer, Map<String, PreparedStatement>> stCache = new HashMap<Integer, Map<String, PreparedStatement>>(); private Map<Integer, Connection> connCache = new HashMap<Integer, Connection>(); private long records = 0; // This method is called one time per each partition public void initPartition(int partition, Path local) throws IOException { Path mysqlDb = new Path(local.getParent(), partition + ""); LOG.info("Initializing SQL connection [" + partition + "]"); try { PortLock portLock = PortUtils.getNextAvailablePort(EmbeddedMySQLConfig.DEFAULT_PORT); EmbeddedMySQL mySQL = null; EmbeddedMySQLConfig config = null; HashMap<String, Object> customConfig = new HashMap<String, Object>(); // Fixing memory for indexation. Main important parameters is myisam_sort_buffer_size // and key_buffer_size. See http://dev.mysql.com/doc/refman/5.0/en/server-system-variables.html long totalMem = getConf().getLong(GLOBAL_MEMORY_AVAILABLE_FOR_INDEXING, 100 * 1024 * 1024); double shareForSortBuffer = 0.9; customConfig.put("myisam_sort_buffer_size", (long) (shareForSortBuffer * totalMem)); customConfig.put("key_buffer_size", (long) ((1 - shareForSortBuffer) * totalMem)); customConfig.put("myisam_max_sort_file_size", 9223372036854775807l); try { File mysqlDir = new File(mysqlDb.toString()); LOG.info("Going to instantiate a MySQLD in: " + mysqlDir + ", port [" + portLock.getPort() + "] (partition: " + partition + ")"); config = new EmbeddedMySQLConfig(portLock.getPort(), EmbeddedMySQLConfig.DEFAULT_USER, EmbeddedMySQLConfig.DEFAULT_PASS, mysqlDir, customConfig); mySQL = new EmbeddedMySQL(config); mySQL.start(true); } catch (Exception e) { throw e; } finally { portLock.release(); } mySQLs.put(partition, mySQL); // MySQL is successfully started at this point, or an Exception would have been thrown. Class.forName(EmbeddedMySQL.DRIVER); Connection conn = DriverManager.getConnection(config.getLocalJDBCConnection(GENERATED_DB_NAME), config.getUser(), config.getPass()); conn.setAutoCommit(false); connCache.put(partition, conn); Statement st = conn.createStatement(); // Init transaction for (String sql : getPreSQL()) { LOG.info("Executing: " + sql); st.execute(sql); } st.execute("BEGIN"); st.close(); Map<String, PreparedStatement> stMap = new HashMap<String, PreparedStatement>(); stCache.put(partition, stMap); } catch (Exception e) { throw new IOException(e); } } /** * Loads global variable configuration */ protected void loadGlobalConf() { Configuration conf = getConf(); if (conf.get(GLOBAL_AUTO_TRIM_STRING) != null) { globalAutoTrim = conf.getBoolean(GLOBAL_AUTO_TRIM_STRING, true); } if (conf.get(GLOBAL_STRING_FIELD_SIZE) != null) { globalStringFieldSize = conf.getInt(GLOBAL_STRING_FIELD_SIZE, 255); } } protected boolean fixedSizedStringField(Field f) { return fixedSizeStringField(f) != -1; } protected int fixedSizeStringField(Field f) { if (globalStringFieldSize != null) return globalStringFieldSize; else { if (f.getProp(STRING_FIELD_SIZE_PANGOOL_FIELD_PROP) != null) { return Integer.parseInt(f.getProp(STRING_FIELD_SIZE_PANGOOL_FIELD_PROP)); } } return -1; } protected boolean autoTrim(Field f) { if (globalAutoTrim != null) { return globalAutoTrim; } else { if (f.getProp(AUTO_TRIM_STRING_PANGOOL_FIELD_PROP) != null) { return new Boolean(f.getProp(AUTO_TRIM_STRING_PANGOOL_FIELD_PROP)); } } return false; } @Override public void write(ITuple tuple) throws IOException, InterruptedException { int partition = (Integer) tuple.get(PARTITION_TUPLE_FIELD); try { /* * Key performance trick: Cache PreparedStatements when possible. We will have one PreparedStatement per each * different Tuple Schema (table). */ Map<String, PreparedStatement> stMap = stCache.get(partition); PreparedStatement pS = stMap.get(tuple.getSchema().getName()); if (pS == null) { Connection conn = connCache.get(partition); // Create a PreparedStatement according to the received Tuple String preparedStatement = "INSERT INTO " + tuple.getSchema().getName() + " VALUES ("; // NOTE: tuple.getSchema().getFields().size() - 1 : quick way of skipping "_partition" fields here for (int i = 0; i < tuple.getSchema().getFields().size() - 1; i++) { preparedStatement += "?, "; } preparedStatement = preparedStatement.substring(0, preparedStatement.length() - 2) + ");"; pS = conn.prepareStatement(preparedStatement); stMap.put(tuple.getSchema().getName(), pS); } int count = 1, tupleCount = 0; for (Field field : tuple.getSchema().getFields()) { if (field.getName().equals(PARTITION_TUPLE_FIELD)) { tupleCount++; continue; } if (field.getType().equals(Type.STRING)) { boolean autoTrim = autoTrim(field); int fieldSize = fixedSizeStringField(field); String str = tuple.getString(tupleCount); if (fieldSize > -1 && autoTrim && str != null && str.length() > fieldSize) { str = str.substring(0, fieldSize); } pS.setObject(count, str); } else { pS.setObject(count, tuple.get(tupleCount)); } count++; tupleCount++; } pS.execute(); records++; if (records == getBatchSize()) { Connection conn = connCache.get(partition); Statement st = conn.createStatement(); st.execute("COMMIT"); st.execute("BEGIN"); st.close(); records = 0; } } catch (Exception e) { throw new IOException(e); } } @Override public void close() throws IOException, InterruptedException { try { for (Map.Entry<Integer, Connection> entry : connCache.entrySet()) { LOG.info("Closing SQL connection [" + entry.getKey() + "]"); // Connection conn = entry.getValue(); Statement st = conn.createStatement(); st.execute("COMMIT"); if (getPostSQL() != null) { LOG.info("Executing end SQL statements."); for (String sql : getPostSQL()) { LOG.info("Executing: " + sql); st.execute(sql); } } st.close(); conn.close(); // close MySQL before copying files (so mysql.sock disappears!) EmbeddedMySQL msql = mySQLs.get(entry.getKey()); msql.stop(); File resident = msql.getConfig().getResidentFolder(); File zipDest = new File(resident.getParentFile(), entry.getKey() + ".db"); // Create a "partition.db" zip with the needed files. CompressorUtil.createZip(resident, zipDest, new WildcardFileFilter( new String[] { "ib*", "*.frm", "*.MYD", "*.MYI", "db.opt", "*.ibd" }), FileFilterUtils.or(FileFilterUtils.nameFileFilter("data"), FileFilterUtils.nameFileFilter("splout"))); // Delete all files except the generated zip "partition.db" FileUtils.deleteDirectory(new File(resident, "bin")); FileUtils.deleteDirectory(new File(resident, "data")); FileUtils.deleteDirectory(new File(resident, "share")); } } catch (Exception e) { throw new IOException(e); } finally { // in any case, destroy the HeartBeater for (Map.Entry<Integer, EmbeddedMySQL> entry : mySQLs.entrySet()) { entry.getValue().stop(); } } } }