Java tutorial
/* * Copyright [2015] [Stefan Prll] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright [2015] [Stefan Prll] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright [2015] [Stefan Prll] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package CSVTools; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringEscapeUtils; import org.supercsv.io.CsvListReader; import org.supercsv.io.CsvListWriter; import org.supercsv.io.ICsvListReader; import org.supercsv.prefs.CsvPreference; import javax.sql.rowset.CachedRowSet; import java.io.*; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.*; import java.util.logging.Logger; /** * Provides methods and tools for working with CSV files. */ public class CsvToolsApi { private Logger logger; private static MessageDigest crypto; private String directory; public CsvToolsApi() { this.logger = Logger.getLogger(this.getClass().getName()); try { this.crypto = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); } crypto.reset(); } /** * Return coma seperated list of header names * * @throws java.io.IOException */ public String getHeadersOfCSV(String inFile) { String[] header = null; CsvListReader reader = null; try { reader = new CsvListReader(new FileReader(inFile), CsvPreference.STANDARD_PREFERENCE); header = reader.getHeader(false); System.out.println("Reader: " + header.length); // Read headers from file and remove spaces header = this.replaceSpaceWithDash(reader.getHeader(true)); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } String headerNames = "("; for (String columnName : header) { System.out.println("current header: " + columnName); headerNames += columnName.toUpperCase() + " VARCHAR NOT NULL,"; } if (headerNames.lastIndexOf(',') == headerNames.length() - 1) { int index = headerNames.lastIndexOf(','); if (index != -1) { headerNames = headerNames.substring(0, index); } } headerNames += ")"; System.out.println("HEADER NAMES " + headerNames); return headerNames; } /** * Return coma seperated list of header names * * @throws java.io.IOException */ public String[] getArrayOfHeadersCSV(String inFile) { String[] header = null; CsvListReader reader; try { reader = new CsvListReader(new FileReader(inFile), CsvPreference.STANDARD_PREFERENCE); header = this.replaceSpaceWithDash(reader.getHeader(true)); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Read headers from file and remove spaces catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // this.logger.info("There are " + header.length + " headers"); // this.printArray(header); return header; } public void printArray(String[] array) { for (int i = 0; i < array.length; i++) { this.logger.info(array[i]); } } /** * Get the headers as List * @param inFile * @return */ public List<String> getListOfHeadersCSV(String inFile) { List<String> headersList = new ArrayList<>(); String[] headerArray = this.getArrayOfHeadersCSV(inFile); for (String columnHeader : headerArray) { headersList.add(columnHeader); //this.logger.info("Added " + columnHeader + " size is now " + headersList.size()); } return headersList; } /** * Remove all spaces from header names by removing leading spaces and * replace other spaces with underscore * * @param headers * @return */ private String[] replaceSpaceWithDash(String[] headers) { String[] headersWithNoSpaces = headers; //System.out.println("replace spaces in " + headers.length + " headers "); for (int i = 0; i < headers.length; i++) { //TODO check the regular expression for table headers if (headers[i] != "") { // System.out.println("Header: " + headers[i]); // String regex = "[-\\w._+%]"; // headersWithNoSpaces[i] = headers[i].replaceAll(regex, "_"); headersWithNoSpaces[i] = headers[i].replaceAll("\\s", "_"); //TODO dummy test //headersWithNoSpaces[i] = headers[i]; } } return headersWithNoSpaces; } /** * Remove all spaces from header names by removing leading spaces and * replace other spaces with underscore * * @param headers * @return */ private List<String> replaceSpaceWithDash(List<String> headers) { List<String> headersWithNoSpaces = headers; for (int i = 0; i < headers.size(); i++) { headersWithNoSpaces.set(i, headers.get(i).replaceAll("^\\s+", "").replace(" ", "_")); headersWithNoSpaces.set(i, headers.get(i).replace("(", "_").replace(")", "_")); headersWithNoSpaces.set(i, headers.get(i).replace("/", "_").replace("\\", "_")); } return headersWithNoSpaces; } /** * replace Blanks with dashes * * @param withSpaceCharacter * @return */ public String replaceSpaceWithDash(String withSpaceCharacter) { String noSpaceString = withSpaceCharacter.replaceAll("^\\s+", "").replace(" ", "_"); noSpaceString = noSpaceString.replace("(", "_").replace(")", "_"); noSpaceString = noSpaceString.replace("/", "_").replace("\\", "_"); return noSpaceString; } /** * Escape a string * * @param withQuoteCharacter * @return */ public String escapeQuotes(String withQuoteCharacter) { String output = withQuoteCharacter; if (withQuoteCharacter.contains("\"")) { output = StringEscapeUtils.escapeJava(withQuoteCharacter); output = output.replaceAll("'", "''"); } return output; } private static void appendMetadataToCSV(String inFile, String outFile) throws Exception { CsvListReader reader = new CsvListReader(new FileReader(inFile), CsvPreference.TAB_PREFERENCE); CsvListWriter writer = new CsvListWriter(new FileWriter(outFile), CsvPreference.TAB_PREFERENCE); final String[] header = reader.getHeader(true); // Create the new header array including sequence number and hash String[] newHeader = new String[reader.length() + 2]; // Prepend the new column header for the sequence number newHeader[0] = "SequenceNumber"; // for (int i = 1; i < newHeader.length - 1; i++) { newHeader[i] = header[i - 1]; } newHeader[newHeader.length - 1] = "Hash"; writer.writeHeader(newHeader); List<String> columns; while ((columns = reader.read()) != null) { // Add new columns String appendedColumns = convertStringListToAppendedString(columns); String hash = calculateSHA1HashFromString(appendedColumns); // System.out.println(appendedColumns); // System.out.println(hash); columns.add(0, Integer.toString(reader.getRowNumber() - 1)); columns.add(hash); writer.write(columns); } reader.close(); writer.close(); } private static void createSeperateMetadataFile(String inFile, String outFile) throws Exception { deleteOutputFile(outFile + "Metadata.csv"); CsvListReader reader = new CsvListReader(new FileReader(inFile), CsvPreference.TAB_PREFERENCE); CsvListWriter writer = new CsvListWriter(new FileWriter(outFile + "Metadata.csv"), CsvPreference.TAB_PREFERENCE); final String[] header = reader.getHeader(true); // Create the new header array including sequence number and hash String[] newHeader = new String[2]; // Prepend the new column header for the sequence number newHeader[0] = "SequenceNumber"; newHeader[1] = "Hash"; writer.writeHeader(newHeader); List<String> columns; while ((columns = reader.read()) != null) { List<String> sequenceAndHashList = new ArrayList(); // Calculate the hash from one row String appendedColumns = convertStringListToAppendedString(columns); String hash = calculateSHA1HashFromString(appendedColumns); // System.out.println(appendedColumns); // System.out.println(hash); sequenceAndHashList.add(0, Integer.toString(reader.getRowNumber() - 1)); sequenceAndHashList.add(hash); writer.write(sequenceAndHashList); } reader.close(); writer.close(); } /** * Append all elements of a list in one string * * @param columns * @return */ public static String convertStringListToAppendedString(List<String> columns) { String allColumnsAppended = ""; for (int i = 0; i < columns.size(); i++) { allColumnsAppended += columns.get(i); // System.out.println("pos " +i + " val: " +columns.get(i) ); } return allColumnsAppended; } /** * Calculate a SHA1-Hash from a string * @param inputString * @return * @throws NoSuchAlgorithmException */ public static String calculateSHA1HashFromString(String inputString) throws NoSuchAlgorithmException { try { crypto.update(inputString.getBytes("utf8")); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } String hash = DigestUtils.sha1Hex(crypto.digest()); return hash; } public static void deleteOutputFile(String fileName) { File file = new File(fileName); if (file.exists()) { file.delete(); // you might want to check if delete was successfull } } private static void hashFile(String fileName) throws NoSuchAlgorithmException, IOException { MessageDigest md = MessageDigest.getInstance("SHA1"); FileInputStream fis; fis = new FileInputStream(fileName); byte[] dataBytes = new byte[1024]; int nread = 0; while ((nread = fis.read(dataBytes)) != -1) { md.update(dataBytes, 0, nread); } ; byte[] mdbytes = md.digest(); // convert the byte to hex format StringBuffer sb = new StringBuffer(""); for (int i = 0; i < mdbytes.length; i++) { sb.append(Integer.toString((mdbytes[i] & 0xff) + 0x100, 16).substring(1)); } System.out.println("Digest(in hex format):: " + sb.toString()); } /** * Analyse the CSV file. if it has headers, read them and store the * metadata. Then check for the longest entry per column and store the * length in the metadata. * * @param hasHeader * @param fileName * @return * @throws java.io.IOException */ // TODO public Column[] analyseColumns(boolean hasHeader, String fileName) throws IOException { CsvListReader reader = new CsvListReader(new FileReader(fileName), CsvPreference.STANDARD_PREFERENCE); this.logger.info("Analyzing columns..."); String[] headerStringArray = reader.getHeader(true); List<String> rowAsTokens; List<String> header = new ArrayList<String>(reader.read()); header = this.replaceSpaceWithDash(header); header = this.replaceReservedKeyWords(header); int columnCount = header.size(); Column[] columnsMetadata = new Column[columnCount]; for (int i = 0; i < columnCount; i++) { String currentHeader = null; if (hasHeader) { columnsMetadata[i] = new Column(header.get(i), 0); } else { columnsMetadata[i] = new Column("Column_" + Integer.toString(i), 0); } } // Read the CSV as List of Maps where each Map represents row data List<Map<String, String>> rows = new ArrayList<Map<String, String>>(); Map<String, String> row = null; // read each row as a map <headername, value> while ((rowAsTokens = reader.read()) != null) { // Create hash map of each row row = new HashMap<String, String>(); int contentLength = 0; String currentToken = ""; for (int i = 0; i < header.size(); i++) { currentToken = rowAsTokens.get(i); if (currentToken != null) { contentLength = rowAsTokens.get(i).length() + 1; } else { contentLength = 1; } row.put(header.get(i), rowAsTokens.get(i)); columnsMetadata[i].setMaxContentLength(contentLength); // columnsMetadata[i].setDataTypeString(dataTypeDetectorAPI.getDataType(currentToken)); } // add Row map to list of rows rows.add(row); } return columnsMetadata; } // TODO umschreiben in die db importier funktion public List<String> replaceReservedKeyWords(List<String> header) { List<String> reservedKeyWords = new ArrayList<String>(); reservedKeyWords.add("release"); for (String currentHeader : header) { if (reservedKeyWords.contains(currentHeader.toLowerCase().trim())) { this.logger.info("Reserved keyword found"); int index = header.indexOf(currentHeader); // replace the header with backticks header.set(index, ("`" + currentHeader + "`")); } } return header; } public String replaceReservedKeyWords(String input) { List<String> reservedKeyWords = new ArrayList<String>(); reservedKeyWords.add("release"); reservedKeyWords.add("year"); if (reservedKeyWords.contains(input.toLowerCase().trim())) { this.logger.info("Reserved keyword found"); input = ("`" + input + "`"); } return input; } /** * Get List of all rows * * @param fileName * @return * @throws Exception */ public List<String> readWithCsvListReaderAsStrings(String fileName) throws Exception { List<String> columns; List<String[]> csvRow; CsvListReader reader = new CsvListReader(new FileReader(fileName), CsvPreference.TAB_PREFERENCE); ICsvListReader listReader = null; try { final String[] header = reader.getHeader(true); // Create the new header array including sequence number and hash String[] newHeader = new String[reader.length() + 2]; // Prepend the new column header for the sequence number newHeader[0] = "SequenceNumber"; // for (int i = 1; i < newHeader.length - 1; i++) { newHeader[i] = header[i - 1]; } newHeader[newHeader.length - 1] = "Hash"; while ((columns = reader.read()) != null) { // Add new columns String appendedColumns = convertStringListToAppendedString(columns); String hash = calculateSHA1HashFromString(appendedColumns); //System.out.println(appendedColumns); // System.out.println(hash); columns.add(0, Integer.toString(reader.getRowNumber() - 1)); columns.add(hash); } reader.close(); } finally { if (listReader != null) { listReader.close(); } } return columns; } public void writeResultSetIntoCSVFile(CachedRowSet resultSet, String path) { PrintWriter csvWriter = null; try { csvWriter = new PrintWriter(new File(path)); ResultSetMetaData meta = resultSet.getMetaData(); int numberOfColumns = meta.getColumnCount(); String dataHeaders = "\"" + meta.getColumnName(1) + "\""; for (int i = 2; i < numberOfColumns + 1; i++) { dataHeaders += ",\"" + meta.getColumnName(i) + "\""; } csvWriter.println(dataHeaders); resultSet.beforeFirst(); while (resultSet.next()) { String row = "\"" + resultSet.getString(1) + "\""; for (int i = 2; i < numberOfColumns + 1; i++) { row += ",\"" + resultSet.getString(i) + "\""; } csvWriter.println(row); } csvWriter.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } public void createCSVDirectory(String path) { File file = new File(path); boolean b = false; /* * exists() method tests whether the file or directory denoted by this * abstract pathname exists or not accordingly it will return TRUE / * FALSE. */ if (!file.exists()) { /* * mkdirs() method creates the directory mentioned by this abstract * pathname including any necessary but nonexistent parent * directories. * * Accordingly it will return TRUE or FALSE if directory created * successfully or not. If this operation fails it may have * succeeded in creating some of the necessary parent directories. */ b = file.mkdirs(); } if (b) System.out.println("Directory successfully created"); else System.out.println("Failed to create directory"); } public void deleteCSVDirectory(String path) { File file = new File(path); try { FileUtils.deleteDirectory(file); } catch (IOException e) { e.printStackTrace(); } } public int getamounfOfColumnsFromCsvFile(String fileName) { String[] headers = getArrayOfHeadersCSV(fileName); int csvColumnsAmount = headers.length; return csvColumnsAmount; } public String getDirectory() { return directory; } public void setDirectory(String directory) { this.directory = directory; } }