Java tutorial
/* * Copyright (c) 2015. OSR Data Corporation * * Licensed under the Apache License, Version 2.0 (the "License") * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.osrdata.etltoolbox.fileloader; import au.com.bytecode.opencsv.CSVParser; import au.com.bytecode.opencsv.CSVReader; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.datasource.DataSourceTransactionManager; import org.springframework.transaction.TransactionStatus; import org.springframework.transaction.support.TransactionCallbackWithoutResult; import org.springframework.transaction.support.TransactionTemplate; import javax.sql.DataSource; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.math.BigDecimal; import java.sql.SQLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This class both encapsulates the source-to-target mappings and the processing of a source file to a database * targetTable. */ public class FileSpecification { private static final Logger log = LogManager.getLogger(FileSpecification.class); private Pattern sourcePattern; private Integer dateGroup; private String dateFormat; private Integer typeGroup; private Integer sourceId; private File sourceFile; private List<String> targetColumns = null; private String targetTable; private String targetSql; private DataSource targetDs = null; private JdbcTemplate targetTemplate = null; private DataSource auditDs = null; private int parserLine = 0; private char parserSeparator = CSVParser.DEFAULT_SEPARATOR; private char parserQuotechar = CSVParser.DEFAULT_QUOTE_CHARACTER; private char parserEscape = CSVParser.DEFAULT_ESCAPE_CHARACTER; private boolean parserStrictQuotes = CSVParser.DEFAULT_STRICT_QUOTES; private boolean parserIgnoreLeadingWhiteSpace = CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE; private int batchThreshold; private boolean replaceExisting; private Date etlDate; private String etlType; private BigDecimal recordId; private int numColumns = 0; private long numRecords = 0l; private ArrayList<Object[]> records = new ArrayList<Object[]>(); private long startTime = 0l; private long trace = 0l; /** * Constructs and initializes this object using source to target specifications contained in specification file. * @param spec file name of JSON formatted file that contains source to target specifications */ public FileSpecification(Map<String, Object> spec, DataSource auditDs, DataSource targetDs, int batchThreshold, boolean replaceExisting, long trace) { String stringProperty = (String) spec.get("sourcePattern"); sourcePattern = Pattern.compile(stringProperty, Pattern.CASE_INSENSITIVE); if (spec.containsKey("parserLine")) { parserLine = (Integer) spec.get("parserLine"); } if (spec.containsKey("parserSeparator")) { stringProperty = (String) spec.get("parserSeparator"); parserSeparator = stringProperty.charAt(0); } if (spec.containsKey("parserQuotechar")) { stringProperty = (String) spec.get("parserQuotechar"); parserQuotechar = stringProperty.charAt(0); } if (spec.containsKey("parserEscape")) { stringProperty = (String) spec.get("parserEscape"); parserEscape = stringProperty.charAt(0); } dateGroup = (Integer) spec.get("dateGroup"); dateFormat = (String) spec.get("dateFormat"); typeGroup = (Integer) spec.get("typeGroup"); sourceId = (Integer) spec.get("sourceId"); targetTable = (String) spec.get("targetTable"); targetColumns = (List<String>) spec.get("targetColumns"); this.auditDs = auditDs; this.targetDs = targetDs; this.batchThreshold = batchThreshold; this.replaceExisting = replaceExisting; this.trace = trace; StringBuffer sb = new StringBuffer(); sb.append("insert into ").append(targetTable).append(" ("); boolean firstColumn = true; for (int i = 0; i < targetColumns.size(); i++) { // Columns defined as empty string indicate that they should be skipped if (!targetColumns.get(i).equals("")) { if (firstColumn) { sb.append(targetColumns.get(i)); firstColumn = false; } else { sb.append(", ").append(targetColumns.get(i)); } } } if (sourceId != null) { sb.append(", source_id, file_id, record_id)"); } else { sb.append(")"); } sb.append(" values ("); firstColumn = true; for (int i = 0; i < targetColumns.size(); i++) { // Columns defined as empty string indicate that they should be skipped if (!targetColumns.get(i).equals("")) { if (firstColumn) { sb.append("?"); firstColumn = false; } else { sb.append(", ?"); } numColumns++; } } // If source ID is not defined, then do not include source ID, file ID, and record ID fields. if (sourceId != null) { sb.append(", ?, ?, ?)"); numColumns += 3; } else { sb.append(")"); } targetSql = sb.toString(); } /** * Determines if specified source file's name matches sourcePattern defined for this source to target specification. * @param sourceFile source file to match against * @return true, if file's name matches source sourcePattern; otherwise, false */ public boolean match(File sourceFile) { Matcher matcher = sourcePattern.matcher(sourceFile.getName()); return (matcher.matches()); } /** * Loads specified file into target targetTable. This operation transactional and will rollback any database operations if * there are any errors processing the data. * * @param sourceFile source file to be loaded * @throws IOException on error reading file * @throws ParseException on error parsing fields from file */ public void load(final File sourceFile) throws IOException, ParseException { this.sourceFile = sourceFile; Matcher matcher = sourcePattern.matcher(sourceFile.getName()); etlDate = new Date(); etlType = "I"; if (matcher.find()) { if (dateGroup != null && dateGroup.intValue() <= matcher.groupCount()) { etlDate = new SimpleDateFormat(dateFormat).parse(matcher.group(dateGroup.intValue())); } if (typeGroup != null && typeGroup.intValue() <= matcher.groupCount()) { etlType = matcher.group(typeGroup.intValue()).substring(0, 1).toUpperCase(); } } recordId = new BigDecimal(new SimpleDateFormat("yyyyMMdd").format(etlDate) + "0000000000"); DataSourceTransactionManager txManager = new DataSourceTransactionManager(targetDs); TransactionTemplate txTemplate = new TransactionTemplate(txManager); targetTemplate = new JdbcTemplate(targetDs); log.info("Processing source file " + sourceFile.getName()); numRecords = 0l; try { txTemplate.execute(new TransactionCallbackWithoutResult() { public void doInTransactionWithoutResult(TransactionStatus status) { try { boolean loadFlag = false; Integer fileId = selectAuditFile(); if (fileId != null && replaceExisting) { deleteExisting(fileId); updateAuditFile(fileId); loadFlag = true; } else if (fileId == null) { fileId = insertAuditFile(); loadFlag = true; } if (loadFlag) { CSVReader reader = new CSVReader(new FileReader(sourceFile), parserSeparator, parserQuotechar, parserEscape, parserLine, parserStrictQuotes, parserIgnoreLeadingWhiteSpace); String[] values; startTime = System.currentTimeMillis(); while ((values = reader.readNext()) != null) { add(values, fileId); numRecords++; if (trace > 0l && numRecords % trace == 0l) { log.info("\tProcessed " + getCount(numRecords) + " records in " + getDuration() + " (" + getRecordsPerSecond() + " rps)"); } } reader.close(); insertTarget(); } else { log.info("\tSkipping previously loaded file" + sourceFile.getName()); } } catch (RuntimeException e) { throw e; } catch (Throwable e) { log.error("\tError at record " + numRecords + " in " + sourceFile.getName()); throw new RuntimeException(e); } } }); } catch (RuntimeException e) { log.error("\tAn exception occurred while processing record " + numRecords + " in " + sourceFile.getName() + ". All transactions for this file have been rolled back.", e); } log.info("\tCompleted processing of " + getCount(numRecords) + " records in " + getDuration() + " (" + getRecordsPerSecond() + " rps)"); } /** * Inserts record for source file into the audit_file table. */ private Integer insertAuditFile() throws SQLException { JdbcTemplate template = new JdbcTemplate(auditDs); StringBuilder sql = new StringBuilder(); Integer fileId = null; int result = 0; String auditUrl = auditDs.getConnection().getMetaData().getURL(); sql.append("insert into audit_file "); if (auditUrl.indexOf(":sqlserver:") != -1) { sql.append("values (next value for seq_audit, ?, ?, ?, ?, ?, ?)"); } else if (auditUrl.indexOf(":oracle:") != -1) { sql.append("values (seq_audit.nextval, ?, ?, ?, ?, ?, ?)"); } else if (auditUrl.indexOf(":postgresql:") != -1) { sql.append("values (nextval('seq_audit'), ?, ?, ?, ?, ?, ?)"); } else { // If database type is unknown, attempt to insert record letting database set file_id with trigger or identity value. sql.append( "(source_id, file_name, table_name, etl_type, etl_date, processed_flag) values (?, ?, ?, ?, ?, ?)"); } result = template.update(sql.toString(), new Object[] { sourceId, sourceFile.getName(), targetTable, etlType, etlDate, "N" }); if (result > 0) { fileId = selectAuditFile(); } log.debug("\tInsert into audit_file returned " + result + ", fileId " + fileId); return fileId; } /* * Selects file_id of record from audit_file table that matches criteria of file to be loaded, if record exists. */ private Integer selectAuditFile() { JdbcTemplate template = new JdbcTemplate(auditDs); Integer fileId; try { fileId = template.queryForObject( "select file_id from audit_file where source_id = ? and file_name = ? and table_name = ? and etl_type = ? and etl_date = ?", new Object[] { sourceId, sourceFile.getName(), targetTable, etlType, etlDate }, Integer.class); } catch (EmptyResultDataAccessException e) { fileId = null; } log.debug("\tSelect audit_file returned " + fileId); return fileId; } /* * Updates existing audit_file record to reset its processed_flag to "N". */ private void updateAuditFile(Integer fileId) { JdbcTemplate template = new JdbcTemplate(auditDs); int result = template.update("update audit_file set processed_flag = ? where file_id = ?", new Object[] { "N", fileId }); log.debug("\tUpdate audit_file returned " + result); } /** * Deletes existing records from target targetTable with file ID. If source ID is not defined then deletes all * records from target targetTable. */ private void deleteExisting(Integer fileId) { JdbcTemplate template = new JdbcTemplate(targetDs); int count; StringBuilder sql = new StringBuilder(); sql.append("delete from ").append(targetTable); if (sourceId != null) { sql.append(" where file_id = ?"); count = template.update(sql.toString(), new Object[] { fileId }); } else { count = template.update(sql.toString()); } log.info("\tDeleted " + getCount(count) + " existing records from " + targetTable); } /** * Adds record to batch insertTarget cache. */ private void add(String[] values, Integer fileId) { Object[] record = new Object[numColumns]; int j = 0; for (int i = 0; i < targetColumns.size(); i++) { if (!targetColumns.get(i).equals("")) { if (i < values.length) { record[j] = values[i]; } else { record[j] = new String(); } j++; } } if (sourceId != null) { record[numColumns - 3] = sourceId; record[numColumns - 2] = fileId; recordId = recordId.add(new BigDecimal(1)); record[numColumns - 1] = recordId; } records.add(record); if (records.size() >= batchThreshold) { insertTarget(); } } /** * Inserts cached records into database targetTable using batch update. */ private void insertTarget() { if (records.size() > 0) { targetTemplate.batchUpdate(targetSql, records); log.debug("\tInserted " + records.size() + " records into " + targetTable); records.clear(); } } /** * Gets formatted duration of current load. * @return formatted duration */ private String getDuration() { long duration = System.currentTimeMillis() - startTime; long milliseconds = duration % 1000; long seconds = (duration / 1000) % 60; long minutes = (duration / (1000 * 60)) % 60; long hours = (duration / (1000 * 60 * 60)) % 24; return String.format("%02d:%02d:%02d.%03d", hours, minutes, seconds, milliseconds); } /** * Gets formatted count. * @param count value to format * @return formatted value. */ private String getCount(long count) { return String.format("%,d", count); } /** * Gets formatted records per second value. * @return */ private String getRecordsPerSecond() { long duration = System.currentTimeMillis() - startTime; double recordsPerSecond = (double) numRecords / (duration / 1000.0d); return String.format("%.2f", recordsPerSecond); } }