List of usage examples for java.util.concurrent ThreadPoolExecutor isTerminated
public boolean isTerminated()
From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java
@App("Creates tokenized files") public void tokenizeBig_files_onSource() throws IOException, InterruptedException { final StopWatch taskStopWatch = new StopWatch(); taskStopWatch.start();// w w w. ja v a2 s. c o m final File parentFolder = new File("D:\\devl\\data\\aakindan"); final File sourceFolder = new File(parentFolder, "src_split"); final File targetFolder = new File(parentFolder, "src_split_tokenized"); final File errorFolder = new File(parentFolder, "src_split_tokenization_error"); final File[] files = sourceFolder.listFiles(); Validate.notNull(files); final List<File> filesToTokenize = new ArrayList<File>(); for (File file : files) { if (file.isDirectory()) continue; filesToTokenize.add(file); } int lineCountOfAllFiles = 0; for (File file : filesToTokenize) { lineCountOfAllFiles += Utilities.lineCount(file); } System.out.println("Total lines in all files " + lineCountOfAllFiles); final StopWatch callbackStopWatch = new StopWatch(); final TokenizationCommandCallback callback = new TokenizationCommandCallback(lineCountOfAllFiles, callbackStopWatch); int NUMBER_OF_THREADS = 8; final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS); callbackStopWatch.start(); for (File sourceFile : filesToTokenize) { final String fileBaseName = sourceFile.getName().substring(0, sourceFile.getName().length() - ".txt.0000".length()); final String index = FilenameUtils.getExtension(sourceFile.getName()); final File targetFile = new File(targetFolder, fileBaseName + "_tokenized.txt." + index); final File errorFile = new File(errorFolder, fileBaseName + "_tokenization_error.txt." + index); pool.execute( new TokenizationCommand(callback, fastRelaxedTokenizer, sourceFile, targetFile, errorFile)); } pool.shutdown(); while (!pool.isTerminated()) { // System.out.println("Waiting pool to be terminated!"); pool.awaitTermination(3000, TimeUnit.MILLISECONDS); } callbackStopWatch.stop(); taskStopWatch.stop(); System.out.println("Total time :" + taskStopWatch.toString()); System.out.println("Nr of tokens : " + callback.getNumberOfTokens()); System.out.println( "Avg time : " + (taskStopWatch.getTime() * 1.0d) / (callback.getNumberOfTokens() * 1.0d) + " ms"); }
From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java
@App("Creates tokenized files") public void convertTokensToLines_Big_files_onSource() throws IOException, InterruptedException { final StopWatch taskStopWatch = new StopWatch(); taskStopWatch.start();// www. j av a 2s . c o m final File parentFolder = new File("D:\\devl\\data\\aakindan"); final File sourceFolder = new File(parentFolder, "src_split_tokenized"); final File targetFolder = new File(parentFolder, "src_split_tokenized_lines"); final File[] files = sourceFolder.listFiles(); Validate.notNull(files); final List<File> filesToTokenize = new ArrayList<File>(); for (File file : files) { if (file.isDirectory()) continue; filesToTokenize.add(file); } final StopWatch callbackStopWatch = new StopWatch(); int NUMBER_OF_THREADS = 8; final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS); callbackStopWatch.start(); for (final File sourceFile : filesToTokenize) { final File targetFile = new File(targetFolder, sourceFile.getName()); pool.execute(new Runnable() { @Override public void run() { System.out.println("Processing file " + sourceFile); BufferedWriter writer = null; try { final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8); writer = Files.newWriter(targetFile, Charsets.UTF_8); for (String line : lines) { final Iterable<String> tokens = Splitter.on(' ').omitEmptyStrings().trimResults() .split(line); for (String token : tokens) { writer.write(token); writer.write("\n"); } } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } }); } pool.shutdown(); while (!pool.isTerminated()) { // System.out.println("Waiting pool to be terminated!"); pool.awaitTermination(3000, TimeUnit.MILLISECONDS); } callbackStopWatch.stop(); taskStopWatch.stop(); System.out.println("Total time :" + taskStopWatch.toString()); }
From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java
@App("Creates tokenized files") public void findUniqueChars_Big_files_onSource() throws IOException, InterruptedException { final StopWatch taskStopWatch = new StopWatch(); taskStopWatch.start();// w ww. ja va2 s . co m final File parentFolder = new File("D:\\devl\\data\\aakindan"); final File targetFile = new File(parentFolder, "chars_with_occurrence.txt"); final File sourceFolder = new File(parentFolder, "src_split_tokenized_lines"); final File[] files = sourceFolder.listFiles(); Validate.notNull(files); final List<File> filesToInvestigate = new ArrayList<File>(); for (File file : files) { if (file.isDirectory()) continue; filesToInvestigate.add(file); } final StopWatch callbackStopWatch = new StopWatch(); int NUMBER_OF_THREADS = 8; final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS); final boolean[] b = new boolean[65536 * 5]; callbackStopWatch.start(); for (final File sourceFile : filesToInvestigate) { pool.execute(new Runnable() { @Override public void run() { System.out.println("Processing file " + sourceFile); try { final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8); for (String token : lines) { for (int i = 0; i < token.length(); i++) { char aChar = token.charAt(i); b[aChar] = true; } } } catch (IOException e) { e.printStackTrace(); } } }); } pool.shutdown(); while (!pool.isTerminated()) { // System.out.println("Waiting pool to be terminated!"); pool.awaitTermination(3000, TimeUnit.MILLISECONDS); } final BufferedWriter writer = Files.newWriter(targetFile, Charsets.UTF_8); for (int i = 0; i < b.length; i++) { boolean occurs = b[i]; if (occurs) { writer.write((char) i); writer.write("\n"); } } writer.close(); callbackStopWatch.stop(); taskStopWatch.stop(); System.out.println("Total time :" + taskStopWatch.toString()); }
From source file:org.trnltk.apps.tokenizer.UniqueWordFinderApp.java
@App("Goes thru tokenized files, finds unique words") public void findWordHistogram() throws InterruptedException { final StopWatch taskStopWatch = new StopWatch(); taskStopWatch.start();/*from w w w .j av a 2 s.c o m*/ final File parentFolder = new File("D:\\devl\\data\\aakindan"); final File sourceFolder = new File(parentFolder, "src_split_tokenized"); final File[] files = sourceFolder.listFiles(); Validate.notNull(files); final List<File> filesToRead = new ArrayList<File>(); for (File file : files) { if (file.isDirectory()) continue; filesToRead.add(file); } int NUMBER_OF_THREADS = 8; final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS); Map[] countMaps = new Map[NUMBER_OF_THREADS]; for (int i = 0; i < countMaps.length; i++) { countMaps[i] = new HashMap(1000000); } for (int i = 0; i < filesToRead.size(); i++) { File file = filesToRead.get(i); //noinspection unchecked pool.execute(new HistogramCommand(countMaps[i % NUMBER_OF_THREADS], file)); } pool.shutdown(); while (!pool.isTerminated()) { //System.out.println("Waiting pool to be terminated!"); pool.awaitTermination(3000, TimeUnit.MILLISECONDS); } System.out.println("Merging countMaps"); final HashMap<String, Integer> mergeMap = new HashMap<String, Integer>( countMaps[0].size() * NUMBER_OF_THREADS); //approx for (Map<String, Integer> countMap : countMaps) { for (Map.Entry<String, Integer> stringIntegerEntry : countMap.entrySet()) { final String surface = stringIntegerEntry.getKey(); final Integer newCount = stringIntegerEntry.getValue(); final Integer existingCount = mergeMap.get(surface); if (existingCount == null) mergeMap.put(surface, newCount); else mergeMap.put(surface, existingCount + newCount); } } System.out.println("Sorting mergeMaps"); final Map<String, Integer> sortedMergeMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String a, String b) { Integer x = mergeMap.get(a); Integer y = mergeMap.get(b); if (x.equals(y)) { return a.compareTo(b); } return y.compareTo(x); } }); sortedMergeMap.putAll(mergeMap); System.out.println("Writing to file"); int numberOfTokens = 0; final File outputFile = new File(parentFolder, "wordHistogram.txt"); BufferedWriter bufferedWriter = null; try { bufferedWriter = Files.newWriter(outputFile, Charsets.UTF_8); for (Map.Entry<String, Integer> entry : sortedMergeMap.entrySet()) { numberOfTokens += entry.getValue(); bufferedWriter.write(entry.getKey() + " " + entry.getValue() + "\n"); } } catch (IOException e) { e.printStackTrace(); } finally { if (bufferedWriter != null) try { bufferedWriter.close(); } catch (IOException e) { System.err.println("Unable to close file "); e.printStackTrace(); } } taskStopWatch.stop(); System.out.println("Total time :" + taskStopWatch.toString()); System.out.println("Nr of tokens : " + numberOfTokens); System.out.println("Nr of unique tokens : " + sortedMergeMap.size()); }