Java tutorial
/*************************************************************************** * Copyright 2010 Global Biodiversity Information Facility Secretariat * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. ***************************************************************************/ package org.gbif.ipt.service.manage.impl; import org.gbif.dwc.text.Archive; import org.gbif.dwc.text.ArchiveFactory; import org.gbif.dwc.text.ArchiveFile; import org.gbif.dwc.text.UnsupportedArchiveException; import org.gbif.ipt.config.AppConfig; import org.gbif.ipt.config.DataDir; import org.gbif.ipt.model.ExcelFileSource; import org.gbif.ipt.model.FileSource; import org.gbif.ipt.model.Resource; import org.gbif.ipt.model.Source; import org.gbif.ipt.model.SqlSource; import org.gbif.ipt.model.TextFileSource; import org.gbif.ipt.service.AlreadyExistingException; import org.gbif.ipt.service.BaseManager; import org.gbif.ipt.service.ImportException; import org.gbif.ipt.service.InvalidFilenameException; import org.gbif.ipt.service.SourceException; import org.gbif.ipt.service.manage.SourceManager; import org.gbif.utils.file.ClosableIterator; import org.gbif.utils.file.ClosableReportingIterator; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.SQLWarning; import java.sql.Statement; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; import com.google.inject.Inject; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; public class SourceManagerImpl extends BaseManager implements SourceManager { private static class ColumnIterator implements ClosableIterator<Object> { private final ClosableIterator<String[]> rows; private final int column; public ColumnIterator(FileSource source, int column) throws IOException { rows = source.rowIterator(); this.column = column; } public void close() { rows.close(); } public boolean hasNext() { return rows.hasNext(); } public Object next() { String[] row = rows.next(); if (row == null || row.length < column) { return null; } return row[column]; } public void remove() { // unsupported } } private class SqlColumnIterator implements ClosableIterator<Object> { private final Connection conn; private final Statement stmt; private final ResultSet rs; private final int column; private boolean hasNext; private final String sourceName; public SqlColumnIterator(SqlSource source, int column) throws SQLException { this(source, column, source.getSql()); } public SqlColumnIterator(SqlSource source, int column, int limit) throws SQLException { this(source, column, source.getSqlLimited(limit)); } /** * SqlColumnIterator constructor * * @param source of the sql data * @param column to inspect, zero based numbering as used in the dwc archives * @param sql statement to query in the sql source */ private SqlColumnIterator(SqlSource source, int column, String sql) throws SQLException { this.conn = getDbConnection(source); this.stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); source.getRdbms().enableLargeResultSet(this.stmt); this.column = column + 1; this.rs = stmt.executeQuery(sql); this.hasNext = rs.next(); sourceName = source.getName(); } public void close() { if (rs != null) { try { rs.close(); stmt.close(); conn.close(); } catch (SQLException e) { log.error("Cant close iterator for sql source " + sourceName, e); } } } public boolean hasNext() { return hasNext; } public Object next() { String val = null; if (hasNext) { try { // forward rs cursor hasNext = rs.next(); val = rs.getString(column); } catch (SQLException e2) { hasNext = false; } } return val; } public void remove() { // unsupported } } private class SqlRowIterator implements ClosableReportingIterator<String[]> { private final Connection conn; private final Statement stmt; private final ResultSet rs; private boolean hasNext; private final String sourceName; private final int rowSize; private boolean rowError; private String errorMessage; private Exception exception; SqlRowIterator(SqlSource source) throws SQLException { this.conn = getDbConnection(source); this.stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); source.getRdbms().enableLargeResultSet(this.stmt); this.rs = stmt.executeQuery(source.getSql()); this.rowSize = rs.getMetaData().getColumnCount(); this.hasNext = rs.next(); sourceName = source.getName(); this.rowError = false; } public void close() { if (rs != null) { try { rs.close(); stmt.close(); conn.close(); } catch (SQLException e) { log.error("Cant close iterator for sql source " + sourceName, e); } } } public boolean hasNext() { return hasNext; } public String[] next() { String[] val = new String[rowSize]; if (hasNext) { try { resetReportingIterator(); int gotTo = 0; // field reached in row try { for (int i = 1; i <= rowSize; i++) { val[i - 1] = rs.getString(i); gotTo = i; } } catch (SQLException exOnRow) { log.debug("Exception caught reading row: " + exOnRow.getMessage(), exOnRow); rowError = true; exception = exOnRow; // construct error message showing exception and problem row StringBuilder msg = new StringBuilder(); msg.append("Exception caught reading row: "); msg.append(exOnRow.getMessage()); msg.append("\n"); msg.append("Row: "); for (int i = 0; i < gotTo; i++) { msg.append("[").append(val[i]).append("]"); } errorMessage = msg.toString(); } finally { // forward rs cursor hasNext = rs.next(); } } catch (SQLException e2) { // Exception on advancing cursor, assume no more rows. log.debug("Exception caught advancing cursor: " + e2.getMessage(), e2); hasNext = false; exception = e2; errorMessage = e2.getMessage(); } } return val; } /** * Reset all iterator reporting parameters. */ private void resetReportingIterator() { rowError = false; exception = null; errorMessage = null; } public void remove() { // unsupported } public boolean hasRowError() { return rowError; } public String getErrorMessage() { return errorMessage; } public Exception getException() { return exception; } } // default fetch sized used in SQL statements private static final int FETCH_SIZE = 10; // the maximum time in seconds that a driver will wait while attempting to connect to a database private static final int CONNECTION_TIMEOUT_SECS = 5; private static final String ACCEPTED_FILE_NAMES = "[\\w.\\-\\s\\)\\(]+"; // Allowed characters in file names: alpha-numeric characters, plus ".", "-", "_", ")", "(", and " " private Pattern acceptedPattern = Pattern.compile(ACCEPTED_FILE_NAMES); @Inject public SourceManagerImpl(AppConfig cfg, DataDir dataDir) { super(cfg, dataDir); } public static void copyArchiveFileProperties(ArchiveFile from, TextFileSource to) { to.setEncoding(from.getEncoding()); to.setFieldsEnclosedBy(from.getFieldsEnclosedBy() == null ? null : from.getFieldsEnclosedBy().toString()); to.setFieldsTerminatedBy(from.getFieldsTerminatedBy()); to.setIgnoreHeaderLines(from.getIgnoreHeaderLines()); to.setDateFormat(from.getDateFormat()); } /** * Tests if the the file name is composed of alpha-numeric characters, plus ".", "-", "_", ")", "(", and " ". * * @param fileName the file name * * @return <tt> if accepted, <tt>false</tt> otherwise */ @VisibleForTesting protected boolean acceptableFileName(String fileName) { boolean matches = acceptedPattern.matcher(fileName).matches(); if (!matches) { log.error("File name contains illegal characters: " + fileName); } return matches; } private ExcelFileSource addExcelFile() throws ImportException { ExcelFileSource src = new ExcelFileSource(); // TODO: encoding, header rows, date format? src.setSheetIdx(0); return src; } private TextFileSource addTextFile(File file) throws ImportException { TextFileSource src = new TextFileSource(); try { // anaylze individual files using the dwca reader Archive arch = ArchiveFactory.openArchive(file); copyArchiveFileProperties(arch.getCore(), src); } catch (IOException e) { log.warn(e.getMessage()); throw new ImportException(e); } catch (UnsupportedArchiveException e) { // fine, cant read it with dwca library, but might still be a valid file for manual setup log.warn(e.getMessage()); } return src; } public FileSource add(Resource resource, File file, String fileName) throws ImportException, InvalidFilenameException { log.debug("ADDING SOURCE " + fileName + " FROM " + file.getAbsolutePath()); if (acceptableFileName(fileName)) { FileSource src; String suffix = FilenameUtils.getExtension(fileName); if (suffix != null && (suffix.equalsIgnoreCase("xls") || suffix.equalsIgnoreCase("xlsx"))) { src = addExcelFile(); } else { src = addTextFile(file); } src.setName(fileName); src.setResource(resource); try { // copy file File ddFile = dataDir.sourceFile(resource, src); try { FileUtils.copyFile(file, ddFile); } catch (IOException e1) { throw new ImportException(e1); } src.setFile(ddFile); src.setLastModified(new Date()); // add to resource, allow overwriting existing ones // if the file is uploaded not for the first time resource.addSource(src, true); } catch (AlreadyExistingException e) { throw new ImportException(e); } // analyze file analyze(src); return src; } else { throw new InvalidFilenameException("Filename contains illegal characters"); } } public String analyze(Source source) { if (source instanceof SqlSource) { return analyze((SqlSource) source); } else { return analyze((FileSource) source); } } private String analyze(SqlSource ss) { String problem = null; Connection con = null; Statement stmt = null; ResultSet rs = null; try { con = getDbConnection(ss); // test sql if (StringUtils.trimToNull(ss.getSql()) != null) { stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); stmt.setFetchSize(FETCH_SIZE); rs = stmt.executeQuery(ss.getSqlLimited(FETCH_SIZE)); // get number of columns ResultSetMetaData meta = rs.getMetaData(); ss.setColumns(meta.getColumnCount()); ss.setReadable(true); } } catch (SQLException e) { log.warn("Cant read sql source " + ss, e); problem = e.getMessage(); ss.setReadable(false); } finally { // close result set, statement, and connection in that order if (rs != null) { try { rs.close(); } catch (SQLException e) { log.error("ResultSet could not be closed: " + e.getMessage(), e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { log.error("Statement could not be closed: " + e.getMessage(), e); } } if (con != null) { try { con.close(); } catch (SQLException e) { log.error("Connection could not be closed: " + e.getMessage(), e); } } } return problem; } private String analyze(FileSource src) { BufferedWriter logWriter = null; File logFile = dataDir.sourceLogFile(src.getResource().getShortname(), src.getName()); try { FileUtils.deleteQuietly(logFile); Set<Integer> emptyLines; try { emptyLines = src.analyze(); } catch (IOException e) { return e.getMessage(); } logWriter = new BufferedWriter(new FileWriter(logFile)); logWriter.write("Log for source name:" + src.getName() + " from resource: " + src.getResource().getShortname() + "\n"); if (!emptyLines.isEmpty()) { for (Integer i : Ordering.natural().sortedCopy(emptyLines)) { logWriter.write("Line: " + i + " [EMPTY LINE]\n"); } } else { logWriter.write("No rows were skipped in this source"); } logWriter.flush(); } catch (IOException e) { log.warn("Cant write source log file " + logFile.getAbsolutePath(), e); } finally { if (logWriter != null) { IOUtils.closeQuietly(logWriter); } } return null; } /* * (non-Javadoc) * @see org.gbif.ipt.service.manage.SourceManager#columns(org.gbif.ipt.model.SourceBase) */ public List<String> columns(Source source) { if (source == null) { return Lists.newArrayList(); } if (source instanceof SqlSource) { return columns((SqlSource) source); } return ((FileSource) source).columns(); } private List<String> columns(SqlSource source) { List<String> columns = new ArrayList<String>(); Connection con = null; Statement stmt = null; ResultSet rs = null; try { con = getDbConnection(source); if (con != null) { // test sql stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); stmt.setFetchSize(1); rs = stmt.executeQuery(source.getSqlLimited(1)); // get column metadata ResultSetMetaData meta = rs.getMetaData(); int idx = 1; int max = meta.getColumnCount(); while (idx <= max) { columns.add(meta.getColumnLabel(idx)); idx++; } } else { String msg = "Can't read sql source, the connection couldn't be created with the current parameters"; columns.add(msg); log.warn(msg + " " + source); } } catch (SQLException e) { log.warn("Cant read sql source " + source, e); } finally { // close result set, statement, and connection in that order if (rs != null) { try { rs.close(); } catch (SQLException e) { log.error("ResultSet could not be closed: " + e.getMessage(), e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { log.error("Statement could not be closed: " + e.getMessage(), e); } } if (con != null) { try { con.close(); } catch (SQLException e) { log.error("Connection could not be closed: " + e.getMessage(), e); } } } return columns; } /* * (non-Javadoc) * @see org.gbif.ipt.service.manage.MappingConfigManager#delete(org.gbif.ipt.model.SourceBase.TextFileSource) */ public boolean delete(Resource resource, Source source) { if (source == null) { return false; } resource.deleteSource(source); if (source instanceof TextFileSource) { // also delete source data file TextFileSource fs = (TextFileSource) source; fs.getFile().delete(); } if (source instanceof ExcelFileSource) { // also delete source data file if no further source uses it ExcelFileSource es = (ExcelFileSource) source; boolean del = true; for (Source src : resource.getSources()) { if (!src.equals(es) && src.isExcelSource() && ((ExcelFileSource) src).getFile().equals(es.getFile())) { // another excel source using the same file, dont delete del = false; break; } } if (del) { es.getFile().delete(); } } return true; } private Connection getDbConnection(SqlSource source) throws SQLException { Connection conn = null; // try to connect to db via simple JDBC if (source.getHost() != null && source.getJdbcUrl() != null && source.getJdbcDriver() != null) { try { DriverManager.setLoginTimeout(CONNECTION_TIMEOUT_SECS); Class.forName(source.getJdbcDriver()); conn = DriverManager.getConnection(source.getJdbcUrl(), source.getUsername(), source.getPassword()); // If a SQLWarning object is available, log its // warning(s). There may be multiple warnings chained. SQLWarning warn = conn.getWarnings(); while (warn != null) { log.warn("SQLWarning: state=" + warn.getSQLState() + ", message=" + warn.getMessage() + ", vendor=" + warn.getErrorCode()); warn = warn.getNextWarning(); } } catch (java.lang.ClassNotFoundException e) { String msg = String.format( "Couldnt load JDBC driver to create new external datasource connection with JDBC Class=%s and URL=%s. Error: %s", source.getJdbcDriver(), source.getJdbcUrl(), e.getMessage()); log.warn(msg, e); throw new SQLException(msg, e); } catch (Exception e) { String msg = String.format( "Couldnt create new external datasource connection with JDBC Class=%s, URL=%s, user=%s. Error: %s", source.getJdbcDriver(), source.getJdbcUrl(), source.getUsername(), e.getMessage()); log.warn(msg, e); throw new SQLException(msg); } } return conn; } /* * (non-Javadoc) * @see org.gbif.ipt.service.manage.SourceManager#inspectColumn(org.gbif.ipt.model.SourceBase, int, int) */ public Set<String> inspectColumn(Source source, int column, int maxValues, int maxRows) throws SourceException { Set<String> values = new HashSet<String>(); ClosableIterator<Object> iter = null; try { iter = iterSourceColumn(source, column, maxRows); // get distinct values while (iter.hasNext() && (maxValues < 1 || values.size() < maxValues)) { Object obj = iter.next(); if (obj != null) { String val = obj.toString(); values.add(val); } } } catch (Exception e) { log.error(e); throw new SourceException("Error reading source " + source.getName() + ": " + e.getMessage()); } finally { if (iter != null) { iter.close(); } } return values; } /** * @param limit limit for the recordset passed into the sql. If negative or zero no limit will be used */ private ClosableIterator<Object> iterSourceColumn(Source source, int column, int limit) throws Exception { if (source instanceof SqlSource) { SqlSource src = (SqlSource) source; if (limit > 0) { return new SqlColumnIterator(src, column, limit); } else { return new SqlColumnIterator(src, column); } } else { return new ColumnIterator((FileSource) source, column); } } /* * (non-Javadoc) * @see org.gbif.ipt.service.manage.SourceManager#peek(org.gbif.ipt.model.SourceBase) */ public List<String[]> peek(Source source, int rows) { if (source instanceof SqlSource) { return peek((SqlSource) source, rows); } // both excel and file implement FileSource return peek((FileSource) source, rows); } private List<String[]> peek(FileSource source, int rows) { List<String[]> preview = Lists.newArrayList(); if (source != null) { try { Iterator<String[]> iter = source.rowIterator(); while (rows > 0 && iter.hasNext()) { rows--; preview.add(iter.next()); } } catch (Exception e) { log.warn("Cant peek into source " + source.getName(), e); } } return preview; } private List<String[]> peek(SqlSource source, int rows) { List<String[]> preview = new ArrayList<String[]>(); Connection con = null; Statement stmt = null; ResultSet rs = null; try { con = getDbConnection(source); if (con != null) { // test sql stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); stmt.setFetchSize(rows); rs = stmt.executeQuery(source.getSqlLimited(rows + 1)); // loop over result while (rows > 0 && rs.next()) { rows--; String[] row = new String[source.getColumns()]; for (int idx = 0; idx < source.getColumns(); idx++) { row[idx] = rs.getString(idx + 1); } preview.add(row); } } } catch (SQLException e) { log.warn("Cant read sql source " + source, e); } finally { // close result set, statement, and connection in that order if (rs != null) { try { rs.close(); } catch (SQLException e) { log.error("ResultSet could not be closed: " + e.getMessage(), e); } } if (stmt != null) { try { stmt.close(); } catch (SQLException e) { log.error("Statement could not be closed: " + e.getMessage(), e); } } if (con != null) { try { con.close(); } catch (SQLException e) { log.error("Connection could not be closed: " + e.getMessage(), e); } } } return preview; } public ClosableReportingIterator<String[]> rowIterator(Source source) throws SourceException { if (source == null) { return null; } try { if (source instanceof SqlSource) { return new SqlRowIterator((SqlSource) source); } // both excel and file implement FileSource return ((FileSource) source).rowIterator(); } catch (Exception e) { log.error("Exception while reading source " + source.getName(), e); throw new SourceException("Cant build iterator for source " + source.getName() + " :" + e.getMessage()); } } }