Example usage for java.util.concurrent ThreadPoolExecutor isTerminated

Introduction

In this page you can find the example usage for java.util.concurrent ThreadPoolExecutor isTerminated.

Prototype

public boolean isTerminated()

Source Link

Usage

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void tokenizeBig_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();//  w  w w. ja  v a2  s. c  o  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split");
    final File targetFolder = new File(parentFolder, "src_split_tokenized");
    final File errorFolder = new File(parentFolder, "src_split_tokenization_error");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToTokenize = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToTokenize.add(file);
    }

    int lineCountOfAllFiles = 0;
    for (File file : filesToTokenize) {
        lineCountOfAllFiles += Utilities.lineCount(file);
    }

    System.out.println("Total lines in all files " + lineCountOfAllFiles);

    final StopWatch callbackStopWatch = new StopWatch();
    final TokenizationCommandCallback callback = new TokenizationCommandCallback(lineCountOfAllFiles,
            callbackStopWatch);

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);

    callbackStopWatch.start();
    for (File sourceFile : filesToTokenize) {
        final String fileBaseName = sourceFile.getName().substring(0,
                sourceFile.getName().length() - ".txt.0000".length());
        final String index = FilenameUtils.getExtension(sourceFile.getName());
        final File targetFile = new File(targetFolder, fileBaseName + "_tokenized.txt." + index);
        final File errorFile = new File(errorFolder, fileBaseName + "_tokenization_error.txt." + index);

        pool.execute(
                new TokenizationCommand(callback, fastRelaxedTokenizer, sourceFile, targetFile, errorFile));
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
    System.out.println("Nr of tokens : " + callback.getNumberOfTokens());
    System.out.println(
            "Avg time : " + (taskStopWatch.getTime() * 1.0d) / (callback.getNumberOfTokens() * 1.0d) + " ms");
}

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void convertTokensToLines_Big_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();//  www. j av a  2s  .  c o  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized");
    final File targetFolder = new File(parentFolder, "src_split_tokenized_lines");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToTokenize = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToTokenize.add(file);
    }

    final StopWatch callbackStopWatch = new StopWatch();

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);

    callbackStopWatch.start();
    for (final File sourceFile : filesToTokenize) {
        final File targetFile = new File(targetFolder, sourceFile.getName());
        pool.execute(new Runnable() {
            @Override
            public void run() {
                System.out.println("Processing file " + sourceFile);
                BufferedWriter writer = null;
                try {
                    final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8);
                    writer = Files.newWriter(targetFile, Charsets.UTF_8);
                    for (String line : lines) {
                        final Iterable<String> tokens = Splitter.on(' ').omitEmptyStrings().trimResults()
                                .split(line);
                        for (String token : tokens) {
                            writer.write(token);
                            writer.write("\n");
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                } finally {
                    if (writer != null)
                        try {
                            writer.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                }
            }
        });
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
}

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void findUniqueChars_Big_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();// w  ww.  ja va2  s  .  co  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File targetFile = new File(parentFolder, "chars_with_occurrence.txt");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized_lines");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToInvestigate = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToInvestigate.add(file);
    }

    final StopWatch callbackStopWatch = new StopWatch();

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);
    final boolean[] b = new boolean[65536 * 5];

    callbackStopWatch.start();
    for (final File sourceFile : filesToInvestigate) {
        pool.execute(new Runnable() {
            @Override
            public void run() {
                System.out.println("Processing file " + sourceFile);
                try {
                    final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8);
                    for (String token : lines) {
                        for (int i = 0; i < token.length(); i++) {
                            char aChar = token.charAt(i);
                            b[aChar] = true;
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    final BufferedWriter writer = Files.newWriter(targetFile, Charsets.UTF_8);
    for (int i = 0; i < b.length; i++) {
        boolean occurs = b[i];
        if (occurs) {
            writer.write((char) i);
            writer.write("\n");
        }
    }
    writer.close();

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
}

From source file:org.trnltk.apps.tokenizer.UniqueWordFinderApp.java

@App("Goes thru tokenized files, finds unique words")
public void findWordHistogram() throws InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();/*from w w  w .j av  a 2  s.c o  m*/

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToRead = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToRead.add(file);
    }

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);
    Map[] countMaps = new Map[NUMBER_OF_THREADS];
    for (int i = 0; i < countMaps.length; i++) {
        countMaps[i] = new HashMap(1000000);
    }

    for (int i = 0; i < filesToRead.size(); i++) {
        File file = filesToRead.get(i);
        //noinspection unchecked
        pool.execute(new HistogramCommand(countMaps[i % NUMBER_OF_THREADS], file));
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    System.out.println("Merging countMaps");
    final HashMap<String, Integer> mergeMap = new HashMap<String, Integer>(
            countMaps[0].size() * NUMBER_OF_THREADS); //approx
    for (Map<String, Integer> countMap : countMaps) {
        for (Map.Entry<String, Integer> stringIntegerEntry : countMap.entrySet()) {
            final String surface = stringIntegerEntry.getKey();
            final Integer newCount = stringIntegerEntry.getValue();
            final Integer existingCount = mergeMap.get(surface);
            if (existingCount == null)
                mergeMap.put(surface, newCount);
            else
                mergeMap.put(surface, existingCount + newCount);
        }
    }

    System.out.println("Sorting mergeMaps");
    final Map<String, Integer> sortedMergeMap = new TreeMap<String, Integer>(new Comparator<String>() {
        @Override
        public int compare(String a, String b) {
            Integer x = mergeMap.get(a);
            Integer y = mergeMap.get(b);
            if (x.equals(y)) {
                return a.compareTo(b);
            }
            return y.compareTo(x);
        }
    });

    sortedMergeMap.putAll(mergeMap);

    System.out.println("Writing to file");
    int numberOfTokens = 0;
    final File outputFile = new File(parentFolder, "wordHistogram.txt");
    BufferedWriter bufferedWriter = null;
    try {
        bufferedWriter = Files.newWriter(outputFile, Charsets.UTF_8);
        for (Map.Entry<String, Integer> entry : sortedMergeMap.entrySet()) {
            numberOfTokens += entry.getValue();
            bufferedWriter.write(entry.getKey() + " " + entry.getValue() + "\n");
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (bufferedWriter != null)
            try {
                bufferedWriter.close();
            } catch (IOException e) {
                System.err.println("Unable to close file ");
                e.printStackTrace();
            }
    }

    taskStopWatch.stop();

    System.out.println("Total time :" + taskStopWatch.toString());
    System.out.println("Nr of tokens : " + numberOfTokens);
    System.out.println("Nr of unique tokens : " + sortedMergeMap.size());
}