Java tutorial
/* * Copyright (c) 2011-2015 MetaSolutions AB <info@metasolutions.se> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.entrystore.rowstore.store.impl; import com.opencsv.CSVReader; import org.entrystore.rowstore.etl.EtlStatus; import org.entrystore.rowstore.store.Dataset; import org.entrystore.rowstore.store.RowStore; import org.json.JSONException; import org.json.JSONObject; import org.postgresql.core.BaseConnection; import org.postgresql.util.PGobject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; /** * A PostgreSQL-specific implementation of the Dataset interface. * * @author Hannes Ebner * @see Dataset */ public class PgDataset implements Dataset { private static Logger log = LoggerFactory.getLogger(PgDataset.class); private String id; private int status; private Date created; private String dataTable; private RowStore rowstore; private Map<String, Integer> columnSize = new HashMap<>(); int maxSizeForIndex = 256; protected PgDataset(RowStore rowstore, String id) { if (rowstore == null) { throw new IllegalArgumentException("RowStore must not be null"); } if (id == null) { throw new IllegalArgumentException("Dataset ID must not be null"); } this.rowstore = rowstore; this.id = id; initFromDb(); } protected PgDataset(RowStore rowstore, String id, int status, Date created, String dataTable) { if (rowstore == null) { throw new IllegalArgumentException("RowStore must not be null"); } if (id == null) { throw new IllegalArgumentException("Dataset ID must not be null"); } this.rowstore = rowstore; this.id = id; this.status = status; this.created = created; this.dataTable = dataTable; } /** * @see Dataset#getId() */ @Override public String getId() { return id; } /** * @see Dataset#getStatus() */ @Override public int getStatus() { // we reload the info from the DB because the status may have changed initFromDb(); return status; } /** * @see Dataset#setStatus(int) */ @Override public void setStatus(int status) { Connection conn = null; PreparedStatement stmt = null; try { conn = rowstore.getConnection(); conn.setAutoCommit(true); stmt = conn.prepareStatement("UPDATE " + PgDatasets.TABLE_NAME + " SET status = ? WHERE id = ?"); stmt.setInt(1, status); PGobject uuid = new PGobject(); uuid.setType("uuid"); uuid.setValue(id); stmt.setObject(2, uuid); log.info("Setting status of " + getId() + " to " + EtlStatus.toString(status) + "(" + status + ")"); log.debug("Executing: " + stmt); stmt.executeUpdate(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } this.status = status; } /** * @see Dataset#getCreationDate() */ @Override public Date getCreationDate() { return created; } private String getDataTable() { return dataTable; } /** * @see Dataset#populate(File) */ @Override public boolean populate(File csvFile) throws IOException { if (csvFile == null) { throw new IllegalArgumentException("Argument must not be null"); } String dataTable = getDataTable(); if (dataTable == null) { log.error("Dataset has no data table assigned"); return false; } setStatus(EtlStatus.PROCESSING); Connection conn = null; PreparedStatement stmt = null; CSVReader cr = null; try { conn = rowstore.getConnection(); cr = new CSVReader(new FileReader(csvFile), ',', '"'); int lineCount = 0; String[] labels = null; String[] line; conn.setAutoCommit(false); stmt = conn.prepareStatement("INSERT INTO " + dataTable + " (rownr, data) VALUES (?, ?)"); while ((line = cr.readNext()) != null) { if (lineCount == 0) { labels = line; } else { JSONObject jsonLine = null; try { jsonLine = csvLineToJsonObject(line, labels); } catch (Exception e) { log.error(e.getMessage()); log.info("Rolling back transaction"); conn.rollback(); setStatus(EtlStatus.ERROR); return false; } stmt.setInt(1, lineCount); PGobject jsonb = new PGobject(); jsonb.setType("jsonb"); jsonb.setValue(jsonLine.toString()); stmt.setObject(2, jsonb); log.debug("Adding to batch: " + stmt); stmt.addBatch(); // we execute the batch every 100th line if ((lineCount % 100) == 0) { log.debug("Executing: " + stmt); stmt.executeBatch(); } } lineCount++; } // in case there are some inserts left to be sent (i.e. // batch size above was smaller than 100 when loop ended) log.debug("Executing: " + stmt); stmt.executeBatch(); // we create an index over the data createIndex(conn, dataTable, labels); // we commit the transaction and free the resources of the statement conn.commit(); setStatus(EtlStatus.AVAILABLE); return true; } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); try { log.info("Rolling back transaction"); conn.rollback(); } catch (SQLException e1) { SqlExceptionLogUtil.error(log, e1); } setStatus(EtlStatus.ERROR); return false; } finally { if (cr != null) { try { cr.close(); } catch (IOException e) { log.error(e.getMessage()); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } } private void createIndex(Connection conn, String table, String[] fields) throws SQLException { for (int i = 0; i < fields.length; i++) { // We do not try to index fields that are too large as we would get an error from PostgreSQL // TODO instead of just skipping the index we could run a fulltext-index on such fields Integer fieldSize = columnSize.get(fields[i]); if (fieldSize != null && fieldSize > maxSizeForIndex) { log.debug( "Skipping index creation for field \"" + fields[i] + "\"; the configured max field size is " + maxSizeForIndex + ", but the actual size is " + fieldSize); continue; } // We cannot use prepared statements for CREATE INDEX with parametrized fields: // the type to be used with setObject() is not known and setString() does not work. // It should be safe to run BaseConnection.escapeString() to avoid SQL-injection String sql = new StringBuilder("CREATE INDEX ON ").append(table).append(" (").append("(data->>'") .append(((BaseConnection) conn).escapeString(fields[i])).append("'))").toString(); log.debug("Executing: " + sql); conn.createStatement().execute(sql); } /* The index below may be used for more advanced JSON-specific indexing and querying, but currently we don't need this functionality String sql = new StringBuilder("CREATE INDEX ON ").append(table).append(" USING GIN(data jsonb_path_ops)").toString(); log.debug("Executing: " + sql); conn.createStatement().execute(sql); */ } /** * @see Dataset#query(Map) */ @Override public List<JSONObject> query(Map<String, String> tuples) { Connection conn = null; PreparedStatement stmt = null; ResultSet rs = null; boolean regexp = rowstore.getConfig().hasRegExpQuerySupport(); List<JSONObject> result = new ArrayList<>(); try { conn = rowstore.getConnection(); StringBuilder queryTemplate = new StringBuilder("SELECT data FROM " + getDataTable()); if (tuples != null && tuples.size() > 0) { for (int i = 0; i < tuples.size(); i++) { if (i == 0) { queryTemplate.append(" WHERE "); } else { queryTemplate.append(" AND "); } if (regexp) { // we match using ~ to enable regular expressions queryTemplate.append("data->>? ~ ?"); } else { queryTemplate.append("data->>? = ?"); } } } stmt = conn.prepareStatement(queryTemplate.toString()); if (tuples != null && tuples.size() > 0) { Iterator<String> keys = tuples.keySet().iterator(); int paramPos = 1; while (keys.hasNext()) { String key = keys.next(); stmt.setString(paramPos, key); stmt.setString(paramPos + 1, tuples.get(key)); paramPos += 2; } } log.debug("Executing: " + stmt); rs = stmt.executeQuery(); while (rs.next()) { String value = rs.getString("data"); try { result.add(new JSONObject(value)); } catch (JSONException e) { log.error(e.getMessage()); } } } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } return result; } /** * @see Dataset#getColumnNames() */ @Override public Set<String> getColumnNames() { Connection conn = null; PreparedStatement stmt = null; ResultSet rs = null; Set<String> result = new HashSet<>(); try { conn = rowstore.getConnection(); StringBuilder queryTemplate = new StringBuilder("SELECT * FROM " + getDataTable() + " LIMIT 1"); stmt = conn.prepareStatement(queryTemplate.toString()); log.debug("Executing: " + stmt); rs = stmt.executeQuery(); if (rs.next()) { String strRow = rs.getString("data"); try { JSONObject jsonRow = new JSONObject(strRow); Iterator<String> keys = jsonRow.keys(); while (keys.hasNext()) { result.add(keys.next()); } } catch (JSONException e) { log.error(e.getMessage()); } } } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } return result; } /** * Initializes the object by loading all information from the database. */ private void initFromDb() { Connection conn = null; PreparedStatement stmt = null; ResultSet rs = null; try { conn = rowstore.getConnection(); stmt = conn.prepareStatement("SELECT * FROM " + PgDatasets.TABLE_NAME + " WHERE id = ?"); PGobject uuid = new PGobject(); uuid.setType("uuid"); uuid.setValue(getId()); stmt.setObject(1, uuid); log.info("Loading dataset " + getId() + " from database"); log.debug("Executing: " + stmt); rs = stmt.executeQuery(); if (rs.next()) { this.status = rs.getInt("status"); this.created = rs.getTimestamp("created"); this.dataTable = rs.getString("data_table"); } else { throw new IllegalStateException("Unable to initialize Database object from database"); } } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } } /** * @see Dataset#getRowCount() */ @Override public int getRowCount() { int result = -1; Connection conn = null; PreparedStatement stmt = null; ResultSet rs = null; try { conn = rowstore.getConnection(); stmt = conn.prepareStatement("SELECT COUNT(rownr) AS rowcount FROM " + getDataTable()); log.debug("Executing: " + stmt); rs = stmt.executeQuery(); while (rs.next()) { result = rs.getInt("rowcount"); } rs.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { SqlExceptionLogUtil.error(log, e); } } } return result; } /** * Converts a CSV row to a JSON object. * * @param line The row consisting of its cells' values. * @param labels The column labels. * @return Returns a JSON object consisting of key (labels) - value (line/cell values) pairs. * @throws JSONException */ private JSONObject csvLineToJsonObject(String[] line, String[] labels) throws JSONException { if (line.length > labels.length) { throw new IllegalArgumentException( "Amount of values per row must not be higher than amount of labels in first row of CSV file"); } JSONObject result = new JSONObject(); for (int i = 0; i < line.length; i++) { result.put(labels[i], line[i]); putAndRetainLargestValue(labels[i], line[i].length()); } return result; } private void putAndRetainLargestValue(String key, int length) { Integer existing = columnSize.get(key); if (existing == null || (existing < length)) { columnSize.put(key, length); } } }