org.kuali.ole.gl.batch.BatchSortUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.kuali.ole.gl.batch.BatchSortUtil.java

Source

/*
 * Copyright 2008-2009 The Kuali Foundation
 * 
 * Licensed under the Educational Community License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.opensource.org/licenses/ecl2.php
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.kuali.ole.gl.batch;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.UUID;

import org.apache.commons.io.FileUtils;
import org.kuali.ole.sys.OLEConstants;
import org.kuali.ole.sys.context.SpringContext;
import org.kuali.rice.core.api.config.property.ConfigurationService;

/**
 * This class...
 */
public class BatchSortUtil {
    private static org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger(BatchSortUtil.class);

    private static File tempDir;

    private static File getTempDirectory() {
        if (tempDir == null) {
            tempDir = new File(SpringContext.getBean(ConfigurationService.class)
                    .getPropertyValueAsString(OLEConstants.TEMP_DIRECTORY_KEY));
        }
        return tempDir;
    }

    static public void sortTextFileWithFields(String inputFileName, String outputFileName,
            @SuppressWarnings("rawtypes") Comparator comparator) {
        // create a directory for the interim files
        String tempSortDirName = UUID.randomUUID().toString();
        File tempSortDir = new File(getTempDirectory(), tempSortDirName);
        // ensure the directory is empty
        FileUtils.deleteQuietly(tempSortDir);
        try {
            FileUtils.forceMkdir(tempSortDir);
        } catch (IOException ex) {
            LOG.fatal("Unable to create temporary sort directory", ex);
            throw new RuntimeException("Unable to create temporary sort directory", ex);
        }

        int numFiles = sortToTempFiles(inputFileName, tempSortDir, comparator);

        // now that the sort is complete - merge the sorted files
        mergeFiles(tempSortDir, numFiles, outputFileName, comparator);

        // remove the temporary sort directory
        FileUtils.deleteQuietly(tempSortDir);
    }

    static int linesPerFile = 10000;

    /* Code below derived from code originally written by Sammy Larbi and
     * downloaded from www.codeodor.com.
     *
     * http://www.codeodor.com/index.cfm/2007/5/14/Re-Sorting-really-BIG-files---the-Java-source-code/1208
     */
    private static int sortToTempFiles(String inputFileName, File tempSortDir, Comparator<String> comparator) {
        BufferedReader inputFile;
        try {
            inputFile = new BufferedReader(new FileReader(inputFileName));
        } catch (FileNotFoundException ex) {
            LOG.fatal("Unable to find input file: " + inputFileName, ex);
            throw new RuntimeException("Unable to find input file: " + inputFileName, ex);
        }
        try {
            String line = "";
            ArrayList<String> batchLines = new ArrayList<String>(linesPerFile);

            int numFiles = 0;
            while (line != null) {
                // get 10k rows
                for (int i = 0; i < linesPerFile; i++) {
                    line = inputFile.readLine();
                    if (line != null) {
                        batchLines.add(line);
                    }
                }
                // sort the rows
                //                 batchLines = mergeSort(batchLines, comparator);
                Collections.sort(batchLines, comparator);

                // write to disk
                BufferedWriter bw = new BufferedWriter(new FileWriter(new File(tempSortDir, "chunk_" + numFiles)));
                for (int i = 0; i < batchLines.size(); i++) {
                    bw.append(batchLines.get(i)).append('\n');
                }
                bw.close();
                numFiles++;
                batchLines.clear(); // empty the array for the next pass
            }
            inputFile.close();
            return numFiles;
        } catch (Exception ex) {
            LOG.fatal("Exception processing sort to temp files.", ex);
            throw new RuntimeException(ex);
        }
    }

    private static void mergeFiles(File tempSortDir, int numFiles, String outputFileName,
            Comparator<String> comparator) {
        try {
            ArrayList<FileReader> mergefr = new ArrayList<FileReader>(numFiles);
            ArrayList<BufferedReader> mergefbr = new ArrayList<BufferedReader>(numFiles);
            // temp buffer for writing - contains the minimum record from each file
            ArrayList<String> fileRows = new ArrayList<String>(numFiles);

            BufferedWriter bw = new BufferedWriter(new FileWriter(outputFileName));

            boolean someFileStillHasRows = false;

            // Iterate over all the files, getting the first line in each file
            for (int i = 0; i < numFiles; i++) {
                // open a file reader for each file
                mergefr.add(new FileReader(new File(tempSortDir, "chunk_" + i)));
                mergefbr.add(new BufferedReader(mergefr.get(i)));

                // get the first row
                String line = mergefbr.get(i).readLine();
                if (line != null) {
                    fileRows.add(line);
                    someFileStillHasRows = true;
                } else {
                    fileRows.add(null);
                }
            }

            while (someFileStillHasRows) {
                String min = null;
                int minIndex = 0; // index of the file with the minimum record

                // init for later compare - assume the first file has the minimum
                String line = fileRows.get(0);
                if (line != null) {
                    min = line;
                    minIndex = 0;
                } else {
                    min = null;
                    minIndex = -1;
                }

                // determine the minimum record of the top lines of each file
                // check which one is min
                for (int i = 1; i < fileRows.size(); i++) {
                    line = fileRows.get(i);
                    if (line != null) {
                        if (min != null) {
                            if (comparator.compare(line, min) < 0) {
                                minIndex = i;
                                min = line;
                            }
                        } else {
                            min = line;
                            minIndex = i;
                        }
                    }
                }

                if (minIndex < 0) {
                    someFileStillHasRows = false;
                } else {
                    // write to the sorted file
                    bw.append(fileRows.get(minIndex)).append('\n');

                    // get another row from the file that had the min
                    line = mergefbr.get(minIndex).readLine();
                    if (line != null) {
                        fileRows.set(minIndex, line);
                    } else { // file is out of rows, set to null so it is ignored
                        fileRows.set(minIndex, null);
                    }
                }
                // check if one still has rows
                for (int i = 0; i < fileRows.size(); i++) {
                    someFileStillHasRows = false;
                    if (fileRows.get(i) != null) {
                        if (minIndex < 0) {
                            throw new RuntimeException(
                                    "minIndex < 0 and row found in chunk file " + i + " : " + fileRows.get(i));
                        }
                        someFileStillHasRows = true;
                        break;
                    }
                }

                // check the actual files one more time
                if (!someFileStillHasRows) {
                    //write the last one not covered above
                    for (int i = 0; i < fileRows.size(); i++) {
                        if (fileRows.get(i) == null) {
                            line = mergefbr.get(i).readLine();
                            if (line != null) {
                                someFileStillHasRows = true;
                                fileRows.set(i, line);
                            }
                        }
                    }
                }
            }

            // close all the files
            bw.close();
            for (BufferedReader br : mergefbr) {
                br.close();
            }
            for (FileReader fr : mergefr) {
                fr.close();
            }
        } catch (Exception ex) {
            LOG.error("Exception merging the sorted files", ex);
            throw new RuntimeException("Exception merging the sorted files", ex);
        }
    }

}