Java tutorial
/** * Copyright (c) 2016 Vijay Vijayaram * * Permission is hereby granted, free of charge, to any person obtaining a copy of this software * and associated documentation files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT * NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package com.kumarvv.setl.core; import com.kumarvv.setl.model.Def; import com.kumarvv.setl.model.Row; import com.kumarvv.setl.model.Status; import com.kumarvv.setl.utils.CsvParser; import com.kumarvv.setl.utils.RowSetUtil; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.pmw.tinylog.Logger; import javax.sql.rowset.JdbcRowSet; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.function.Consumer; public class Extractor implements Runnable { final BlockingQueue<Row> queue; final Status status; final Def def; final Map<String, Integer> fromColumns; final Consumer<Boolean> doneCallback; RowSetUtil rowSetUtil; /** * constructor * * @param queue * @param def * @param status * @param doneCallback */ public Extractor(BlockingQueue<Row> queue, Def def, Status status, Consumer<Boolean> doneCallback) { this.queue = queue; this.def = def; this.status = status; this.fromColumns = new HashMap<>(); this.doneCallback = doneCallback; this.rowSetUtil = RowSetUtil.getInstance(); } /** * thread runner */ @Override public void run() { boolean result = true; Logger.info("Extractor is starting..."); try { result = extract(); } finally { Logger.info("Extractor is completed. result={}", result); doneCallback.accept(result); } } /** * extracts data from three type of sources (in priority order) * - sql * - data elements * - csv file * * @return */ boolean extract() { status.reset(); if (def == null || def.getExtract() == null) { return false; } boolean result = false; if (StringUtils.isNotEmpty(def.getExtract().getSql())) { Logger.info("extracting from sql: {}", def.getExtract().getSql()); result = extractDataFromSql(); } else if (CollectionUtils.isNotEmpty(def.getExtract().getData())) { Logger.info("extracting from data"); result = extractDataFromData(); } else if (def.getExtract().getCsv() != null) { Logger.info("extracting from CSV: {}", def.getExtract().getCsv().getFilePath()); result = extractDataFromCsv(); } else { Logger.warn("invalid source sql/csv configuration. skipping ETL."); return false; } return result; } /** * extract data from data element in json * * @return */ boolean extractDataFromData() { if (def.getExtract() == null || def.getExtract().getData() == null) { Logger.info("extract config is missing. skipping extraction"); return true; } def.getExtract().getData().stream().forEach(row -> { addToQueue(new Row(fromColumns, row.getValues())); }); return true; } /** * extract data from csv file * * @return */ boolean extractDataFromCsv() { if (def.getExtract() == null || def.getExtract().getCsv() == null) { Logger.info("extract config is missing. skipping extraction"); return true; } final CsvParser parser = CsvParser.getInstance(def.getExtract().getCsv()); final List<Map<String, Object>> data = parser.parse(); data.stream().forEach(row -> { addToQueue(new Row(fromColumns, row)); }); return true; } /** * extract data using sql definition * * @return */ boolean extractDataFromSql() { if (def.getExtract() == null || StringUtils.isEmpty(def.getExtract().getSql())) { Logger.info("extract config is missing. skipping extraction"); return true; } String sql = def.getExtract().getSql(); try (JdbcRowSet jrs = rowSetUtil.getRowSet(def.getFromDS())) { jrs.setCommand(sql); jrs.execute(); jrs.setFetchDirection(ResultSet.FETCH_FORWARD); jrs.setFetchSize(100); ResultSetMetaData meta = jrs.getMetaData(); initFromColumns(meta); parseData(jrs, meta); return true; } catch (Exception e) { Logger.error("error in extraction: {}", e.getMessage()); Logger.debug(e); return false; } } /** * initializes from columns * * @param meta * @throws SQLException */ void initFromColumns(ResultSetMetaData meta) throws SQLException { fromColumns.putAll(rowSetUtil.getMetaColumns(meta)); } /** * parse data from jdbc row set * * @param jrs * @param meta * @return * @throws SQLException */ long parseData(JdbcRowSet jrs, ResultSetMetaData meta) throws SQLException { if (jrs == null || meta == null) { return 0; } long rowCount = 0; while (jrs.next()) { rowCount++; parseDataRow(jrs, meta); if (!isWithinLimit(rowCount)) { break; } } return rowCount; } /** * parses data row to {@link Row} object * * @param jrs * @param meta * @return {@link Row} * @throws SQLException */ Row parseDataRow(JdbcRowSet jrs, ResultSetMetaData meta) throws SQLException { if (jrs == null || meta == null) { return null; } int colCount = meta.getColumnCount(); Map<String, Object> row = new HashMap<>(); for (int c = 1; c <= colCount; c++) { row.put(meta.getColumnName(c).toLowerCase(), jrs.getObject(c)); } Row ro = new Row(fromColumns, row); addToQueue(ro); return ro; } /** * adds the row to blocking queue and holds the queue any loader threads * * @param row */ void addToQueue(Row row) { try { queue.put(row); status.incrementFound(); } catch (InterruptedException ie) { } } /** * checks if rowcount is within the specified limit in json config * * @param rowCount * @return true | false */ boolean isWithinLimit(long rowCount) { return def.getExtract().getLimitRows() == 0 || rowCount <= def.getExtract().getLimitRows(); } }