Java tutorial
/* * The Kuali Financial System, a comprehensive financial management system for higher education. * * Copyright 2005-2014 The Kuali Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.kuali.kfs.gl.batch; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.UUID; import org.apache.commons.io.FileUtils; import org.kuali.kfs.sys.KFSConstants; import org.kuali.kfs.sys.context.SpringContext; import org.kuali.rice.core.api.config.property.ConfigurationService; /** * This class... */ public class BatchSortUtil { private static org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger(BatchSortUtil.class); private static File tempDir; private static File getTempDirectory() { if (tempDir == null) { tempDir = new File(SpringContext.getBean(ConfigurationService.class) .getPropertyValueAsString(KFSConstants.TEMP_DIRECTORY_KEY)); } return tempDir; } static public void sortTextFileWithFields(String inputFileName, String outputFileName, @SuppressWarnings("rawtypes") Comparator comparator) { // create a directory for the interim files String tempSortDirName = UUID.randomUUID().toString(); File tempSortDir = new File(getTempDirectory(), tempSortDirName); // ensure the directory is empty FileUtils.deleteQuietly(tempSortDir); try { FileUtils.forceMkdir(tempSortDir); } catch (IOException ex) { LOG.fatal("Unable to create temporary sort directory", ex); throw new RuntimeException("Unable to create temporary sort directory", ex); } //LOG.info("Sorting input file " + inputFileName + " into temp directory " + tempSortDir); int numFiles = sortToTempFiles(inputFileName, tempSortDir, comparator); //LOG.info("Merging " + numFiles + " temp files from temp directory into output file " + outputFileName); // now that the sort is complete - merge the sorted files mergeFiles(tempSortDir, numFiles, outputFileName, comparator); File sortedFile = new File(outputFileName); /* if (sortedFile.exists()) { LOG.info("Successfully merged input file " + inputFileName + " to output file " + outputFileName); } */ // remove the temporary sort directory FileUtils.deleteQuietly(tempSortDir); /* LOG.info("Successfully deleted temp directory " + tempSortDir); if (sortedFile.canRead()) { LOG.info("Sorted file " + outputFileName + " is readable upon completion of flat file sorting."); } else { LOG.error("Sorted file " + outputFileName + " can't be read upon completion of flat file sorting."); } */ } static int linesPerFile = 10000; /* Code below derived from code originally written by Sammy Larbi and * downloaded from www.codeodor.com. * * http://www.codeodor.com/index.cfm/2007/5/14/Re-Sorting-really-BIG-files---the-Java-source-code/1208 */ private static int sortToTempFiles(String inputFileName, File tempSortDir, Comparator<String> comparator) { BufferedReader inputFile; try { inputFile = new BufferedReader(new FileReader(inputFileName)); //LOG.info("Successfully opened input file " + inputFileName); } catch (FileNotFoundException ex) { LOG.fatal("Unable to find input file: " + inputFileName, ex); throw new RuntimeException("Unable to find input file: " + inputFileName, ex); } try { String line = ""; ArrayList<String> batchLines = new ArrayList<String>(linesPerFile); int numFiles = 0; while (line != null) { // get 10k rows for (int i = 0; i < linesPerFile; i++) { line = inputFile.readLine(); if (line != null) { batchLines.add(line); } } // sort the rows // batchLines = mergeSort(batchLines, comparator); Collections.sort(batchLines, comparator); // write to disk BufferedWriter bw = new BufferedWriter(new FileWriter(new File(tempSortDir, "chunk_" + numFiles))); for (int i = 0; i < batchLines.size(); i++) { bw.append(batchLines.get(i)).append('\n'); //LOG.info("Writing temp sort file chunk_" + numFiles + " to tempSortDir " + tempSortDir); } bw.close(); //LOG.info("Closed temp sort file chunk_" + numFiles); numFiles++; batchLines.clear(); // empty the array for the next pass } inputFile.close(); //LOG.info("Successfully closed input file " + inputFileName); return numFiles; } catch (Exception ex) { LOG.fatal("Exception processing sort to temp files.", ex); throw new RuntimeException(ex); } } private static void mergeFiles(File tempSortDir, int numFiles, String outputFileName, Comparator<String> comparator) { try { ArrayList<FileReader> mergefr = new ArrayList<FileReader>(numFiles); ArrayList<BufferedReader> mergefbr = new ArrayList<BufferedReader>(numFiles); // temp buffer for writing - contains the minimum record from each file ArrayList<String> fileRows = new ArrayList<String>(numFiles); BufferedWriter bw = new BufferedWriter(new FileWriter(outputFileName)); //LOG.info("Successfully opened output file " + outputFileName); boolean someFileStillHasRows = false; // Iterate over all the files, getting the first line in each file for (int i = 0; i < numFiles; i++) { // open a file reader for each file mergefr.add(new FileReader(new File(tempSortDir, "chunk_" + i))); mergefbr.add(new BufferedReader(mergefr.get(i))); // get the first row String line = mergefbr.get(i).readLine(); if (line != null) { fileRows.add(line); someFileStillHasRows = true; } else { fileRows.add(null); } } while (someFileStillHasRows) { String min = null; int minIndex = 0; // index of the file with the minimum record // init for later compare - assume the first file has the minimum String line = fileRows.get(0); if (line != null) { min = line; minIndex = 0; } else { min = null; minIndex = -1; } // determine the minimum record of the top lines of each file // check which one is min for (int i = 1; i < fileRows.size(); i++) { line = fileRows.get(i); if (line != null) { if (min != null) { if (comparator.compare(line, min) < 0) { minIndex = i; min = line; } } else { min = line; minIndex = i; } } } if (minIndex < 0) { someFileStillHasRows = false; } else { // write to the sorted file bw.append(fileRows.get(minIndex)).append('\n'); // get another row from the file that had the min line = mergefbr.get(minIndex).readLine(); if (line != null) { fileRows.set(minIndex, line); } else { // file is out of rows, set to null so it is ignored fileRows.set(minIndex, null); } } // check if one still has rows for (int i = 0; i < fileRows.size(); i++) { someFileStillHasRows = false; if (fileRows.get(i) != null) { if (minIndex < 0) { throw new RuntimeException( "minIndex < 0 and row found in chunk file " + i + " : " + fileRows.get(i)); } someFileStillHasRows = true; break; } } // check the actual files one more time if (!someFileStillHasRows) { //write the last one not covered above for (int i = 0; i < fileRows.size(); i++) { if (fileRows.get(i) == null) { line = mergefbr.get(i).readLine(); if (line != null) { someFileStillHasRows = true; fileRows.set(i, line); } } } } } // close all the files bw.close(); //LOG.info("Successfully closed output file " + outputFileName); for (BufferedReader br : mergefbr) { br.close(); } for (FileReader fr : mergefr) { fr.close(); } } catch (Exception ex) { LOG.error("Exception merging the sorted files", ex); throw new RuntimeException("Exception merging the sorted files", ex); } } }