List of usage examples for java.util.concurrent ThreadPoolExecutor submit
public Future<?> submit(Runnable task)
From source file:org.deeplearning4j.models.word2vec.Word2Vec.java
/** * Train the model//w ww. jav a 2s .c o m */ public void fit() throws IOException { boolean loaded = buildVocab(); //save vocab after building if (!loaded && saveVocab) vocab().saveVocab(); if (stopWords == null) readStopWords(); log.info("Training word2vec multithreaded"); if (sentenceIter != null) sentenceIter.reset(); if (docIter != null) docIter.reset(); int[] docs = vectorizer.index().allDocs(); if (docs.length < 1) { vectorizer.fit(); } docs = vectorizer.index().allDocs(); if (docs.length < 1) { throw new IllegalStateException("No documents found"); } totalWords = vectorizer.numWordsEncountered(); if (totalWords < 1) throw new IllegalStateException("Unable to train, total words less than 1"); totalWords *= numIterations; log.info("Processing sentences..."); AtomicLong numWordsSoFar = new AtomicLong(0); final AtomicLong nextRandom = new AtomicLong(5); ExecutorService exec = new ThreadPoolExecutor(Runtime.getRuntime().availableProcessors(), Runtime.getRuntime().availableProcessors(), 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new RejectedExecutionHandler() { @Override public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { try { Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } executor.submit(r); } }); final Queue<List<VocabWord>> batch2 = new ConcurrentLinkedDeque<>(); vectorizer.index().eachDoc(new Function<List<VocabWord>, Void>() { @Override public Void apply(List<VocabWord> input) { List<VocabWord> batch = new ArrayList<>(); addWords(input, nextRandom, batch); if (!batch.isEmpty()) { batch2.add(batch); } return null; } }, exec); exec.shutdown(); try { exec.awaitTermination(1, TimeUnit.DAYS); } catch (InterruptedException e) { e.printStackTrace(); } ActorSystem actorSystem = ActorSystem.create(); for (int i = 0; i < numIterations; i++) doIteration(batch2, numWordsSoFar, nextRandom, actorSystem); actorSystem.shutdown(); }
From source file:org.apache.accumulo.core.file.rfile.MultiThreadedRFileTest.java
@SuppressFBWarnings(value = "INFORMATION_EXPOSURE_THROUGH_AN_ERROR_MESSAGE", justification = "information put into error message is safe and used for testing") @Test//from w w w.j a v a2 s. co m public void testMultipleReaders() throws IOException { final List<Throwable> threadExceptions = Collections.synchronizedList(new ArrayList<Throwable>()); Map<String, MutableInt> messages = new HashMap<>(); Map<String, String> stackTrace = new HashMap<>(); final TestRFile trfBase = new TestRFile(conf); writeData(trfBase); trfBase.openReader(); try { validate(trfBase); final TestRFile trfBaseCopy = trfBase.deepCopy(); validate(trfBaseCopy); // now start up multiple RFile deepcopies int maxThreads = 10; String name = "MultiThreadedRFileTestThread"; ThreadPoolExecutor pool = new ThreadPoolExecutor(maxThreads + 1, maxThreads + 1, 5 * 60, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamingThreadFactory(name)); pool.allowCoreThreadTimeOut(true); try { Runnable runnable = () -> { try { TestRFile trf = trfBase; synchronized (trfBaseCopy) { trf = trfBaseCopy.deepCopy(); } validate(trf); } catch (Throwable t) { threadExceptions.add(t); } }; for (int i = 0; i < maxThreads; i++) { pool.submit(runnable); } } finally { pool.shutdown(); try { pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { e.printStackTrace(); } } for (Throwable t : threadExceptions) { String msg = t.getClass() + " : " + t.getMessage(); if (!messages.containsKey(msg)) { messages.put(msg, new MutableInt(1)); } else { messages.get(msg).increment(); } StringWriter string = new StringWriter(); PrintWriter writer = new PrintWriter(string); t.printStackTrace(writer); writer.flush(); stackTrace.put(msg, string.getBuffer().toString()); } } finally { trfBase.closeReader(); trfBase.close(); } for (String message : messages.keySet()) { LOG.error(messages.get(message) + ": " + message); LOG.error(stackTrace.get(message)); } assertTrue(threadExceptions.isEmpty()); }
From source file:com.splicemachine.derby.stream.control.ControlDataSet.java
@Override public DataSet<V> union(DataSet<V> dataSet) { ThreadPoolExecutor tpe = null; try {/*from w w w .j a va 2 s . c om*/ ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("union-begin-query-%d") .setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { e.printStackTrace(); } }).build(); tpe = new ThreadPoolExecutor(2, 2, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), factory, new ThreadPoolExecutor.CallerRunsPolicy()); tpe.allowCoreThreadTimeOut(false); tpe.prestartAllCoreThreads(); Future<Iterator<V>> leftSideFuture = tpe.submit(new NonLazy(iterator)); Future<Iterator<V>> rightSideFuture = tpe.submit(new NonLazy(((ControlDataSet<V>) dataSet).iterator)); return new ControlDataSet<>(Iterators.concat(leftSideFuture.get(), rightSideFuture.get())); } catch (Exception e) { throw new RuntimeException(e); } finally { if (tpe != null) tpe.shutdown(); } }
From source file:org.apache.hadoop.hbase.regionserver.SplitTransactionImpl.java
/** * Creates reference files for top and bottom half of the * @param hstoreFilesToSplit map of store files to create half file references for. * @return the number of reference files that were created. * @throws IOException//from ww w .ja v a 2s . c om */ private Pair<Integer, Integer> splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit) throws IOException { if (hstoreFilesToSplit == null) { // Could be null because close didn't succeed -- for now consider it fatal throw new IOException("Close returned empty list of StoreFiles"); } // The following code sets up a thread pool executor with as many slots as // there's files to split. It then fires up everything, waits for // completion and finally checks for any exception int nbFiles = 0; for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) { nbFiles += entry.getValue().size(); } if (nbFiles == 0) { // no file needs to be splitted. return new Pair<Integer, Integer>(0, 0); } // Default max #threads to use is the smaller of table's configured number of blocking store // files or the available number of logical cores. int defMaxThreads = Math.min( parent.conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors()); // Max #threads is the smaller of the number of storefiles or the default max determined above. int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles); LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent + " using " + maxThreads + " threads"); ThreadFactoryBuilder builder = new ThreadFactoryBuilder(); builder.setNameFormat("StoreFileSplitter-%1$d"); ThreadFactory factory = builder.build(); ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory); List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); // Split each store file. for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) { for (StoreFile sf : entry.getValue()) { StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf); futures.add(threadPool.submit(sfs)); } } // Shutdown the pool threadPool.shutdown(); // Wait for all the tasks to finish try { boolean stillRunning = !threadPool.awaitTermination(this.fileSplitTimeout, TimeUnit.MILLISECONDS); if (stillRunning) { threadPool.shutdownNow(); // wait for the thread to shutdown completely. while (!threadPool.isTerminated()) { Thread.sleep(50); } throw new IOException( "Took too long to split the" + " files and create the references, aborting split"); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } int created_a = 0; int created_b = 0; // Look for any exception for (Future<Pair<Path, Path>> future : futures) { try { Pair<Path, Path> p = future.get(); created_a += p.getFirst() != null ? 1 : 0; created_b += p.getSecond() != null ? 1 : 0; } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } catch (ExecutionException e) { throw new IOException(e); } } if (LOG.isDebugEnabled()) { LOG.debug("Split storefiles for region " + this.parent + " Daughter A: " + created_a + " storefiles, Daughter B: " + created_b + " storefiles."); } return new Pair<Integer, Integer>(created_a, created_b); }
From source file:org.apache.hadoop.hbase.regionserver.IndexSplitTransaction.java
private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit) throws IOException { if (hstoreFilesToSplit == null) { // Could be null because close didn't succeed -- for now consider it fatal throw new IOException("Close returned empty list of StoreFiles"); }// w w w .j ava 2 s .c o m // The following code sets up a thread pool executor with as many slots as // there's files to split. It then fires up everything, waits for // completion and finally checks for any exception int nbFiles = hstoreFilesToSplit.size(); if (nbFiles == 0) { // no file needs to be splitted. return; } ThreadFactoryBuilder builder = new ThreadFactoryBuilder(); builder.setNameFormat("StoreFileSplitter-%1$d"); ThreadFactory factory = builder.build(); ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory); List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles); // Split each store file. for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) { for (StoreFile sf : entry.getValue()) { StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf); futures.add(threadPool.submit(sfs)); } } // Shutdown the pool threadPool.shutdown(); // Wait for all the tasks to finish try { boolean stillRunning = !threadPool.awaitTermination(this.fileSplitTimeout, TimeUnit.MILLISECONDS); if (stillRunning) { threadPool.shutdownNow(); // wait for the thread to shutdown completely. while (!threadPool.isTerminated()) { Thread.sleep(50); } throw new IOException( "Took too long to split the" + " files and create the references, aborting split"); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } // Look for any exception for (Future<Void> future : futures) { try { future.get(); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } catch (ExecutionException e) { throw new IOException(e); } } }
From source file:org.apache.hadoop.hbase.client.TestHCM.java
/** * Tests that a destroyed connection does not have a live zookeeper. * Below is timing based. We put up a connection to a table and then close the connection while * having a background thread running that is forcing close of the connection to try and * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking * zk connections./* w ww.j a v a 2s . co m*/ * @throws Exception */ @Ignore("Flakey test: See HBASE-8996") @Test public void testDeleteForZKConnLeak() throws Exception { TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM); final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration()); config.setInt("zookeeper.recovery.retry", 1); config.setInt("zookeeper.recovery.retry.intervalmill", 1000); config.setInt("hbase.rpc.timeout", 2000); config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10, 5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("test-hcm-delete")); pool.submit(new Runnable() { @Override public void run() { while (!Thread.interrupted()) { try { HConnection conn = HConnectionManager.getConnection(config); LOG.info("Connection " + conn); HConnectionManager.deleteStaleConnection(conn); LOG.info("Connection closed " + conn); // TODO: This sleep time should be less than the time that it takes to open and close // a table. Ideally we would do a few runs first to measure. For now this is // timing based; hopefully we hit the bad condition. Threads.sleep(10); } catch (Exception e) { } } } }); // Use connection multiple times. for (int i = 0; i < 30; i++) { HConnection c1 = null; try { c1 = ConnectionManager.getConnectionInternal(config); LOG.info("HTable connection " + i + " " + c1); HTable table = new HTable(config, TABLE_NAME4, pool); table.close(); LOG.info("HTable connection " + i + " closed " + c1); } catch (Exception e) { LOG.info("We actually want this to happen!!!! So we can see if we are leaking zk", e); } finally { if (c1 != null) { if (c1.isClosed()) { // cannot use getZooKeeper as method instantiates watcher if null Field zkwField = c1.getClass().getDeclaredField("keepAliveZookeeper"); zkwField.setAccessible(true); Object watcher = zkwField.get(c1); if (watcher != null) { if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) { // non-synchronized access to watcher; sleep and check again in case zk connection // hasn't been cleaned up yet. Thread.sleep(1000); if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) { pool.shutdownNow(); fail("Live zookeeper in closed connection"); } } } } c1.close(); } } } pool.shutdownNow(); }
From source file:core.Task.java
private void runNodeTask() { /* Single node id */ String runOnNode = this.coordinates.get("runOnNode"); /*// w w w.j a v a 2 s . c om * Get custom user variables */ ApiRequest variablesRequest = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET) .setApiMethod("v1/core/get-variables"); ApiResponse variablesResponse = ApiCaller.request(variablesRequest); if (!variablesResponse.success) { /* * Log record * Can't get variables */ this.logSystemBadResponse("ERROR", "TASK GET CUSTOM VARIABLES", "Can't get task variables from API.", variablesResponse); return; } String variablesJson = variablesResponse.response; Type variablesType = new TypeToken<HashMap<String, String>>() { }.getType(); Map<String, String> customVariables; try { customVariables = gson.fromJson(variablesJson, variablesType); /* * Setting hashMap of custom user variables */ for (Map.Entry<String, String> curVar : customVariables.entrySet()) { DTOVariableConvertResult curVariableObject = new DTOVariableConvertResult(); curVariableObject.setAction("process"); curVariableObject.setStatus("success"); curVariableObject.setVariableName(curVar.getKey()); curVariableObject.setVariableValue(curVar.getValue()); curVariableObject.setResult(curVar.getValue()); this.variables.put(curVar.getKey(), curVariableObject); } } catch (Exception e) { this.logSystemException("ERROR", "TASK GET CUSTOM VARIABLES", "Can't parse variables list from json.", e); return; } /* * Add Date variable to variables hashMap */ try { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); String date = dateFormat.format(new Date()); DTOVariableConvertResult dateVariableObject = new DTOVariableConvertResult(); dateVariableObject.setAction("process"); dateVariableObject.setStatus("success"); dateVariableObject.setVariableName("%%DATE%%"); dateVariableObject.setVariableValue(date); dateVariableObject.setResult(date); this.variables.put("%%DATE%%", dateVariableObject); } catch (Exception e) { this.logSystemException("ERROR", "TASK GET CUSTOM VARIABLES", "Can't set date variable.", e); return; } /* * Run node task on nodes scope */ if (runOnNode == null) { /* * Get nodes with workers by task */ Map<String, String> params = new HashMap<>(); params.put("schedule_id", this.coordinates.get("scheduleId")); params.put("task_name", this.coordinates.get("taskName")); ApiRequest request = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET) .setApiMethod("v1/core/get-nodes-workers-by-task").setParams(params); ApiResponse nodesResponse = ApiCaller.request(request); if (!nodesResponse.success) { /* * Log record * Can't get node list */ this.logBadResponse("ERROR", "TASK GET NODES", "Task " + this.coordinates.get("taskName") + " can't get node list from API.", nodesResponse); return; } String nodesJson = nodesResponse.response; Type nodesType = new TypeToken<HashMap<String, HashMap<String, String>>>() { }.getType(); try { this.nodes = gson.fromJson(nodesJson, nodesType); } catch (JsonSyntaxException e) { this.logException("ERROR", "TASK GET NODES", "Task " + this.coordinates.get("taskName") + " can't parse nodes list from json.", e); return; } } else { /* Run node task on single node (on demand) */ /* * Get worker by node id */ Map<String, String> params = new HashMap<>(); params.put("node_id", runOnNode); params.put("task_name", this.coordinates.get("taskName")); ApiRequest nodeRequest = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET) .setApiMethod("v1/core/get-worker-by-node-id").setParams(params); ApiResponse nodeResponse = ApiCaller.request(nodeRequest); if (!nodeResponse.success) { /* * Log record * Can't get node list */ this.logBadResponse("ERROR", "TASK GET NODES", "Task " + this.coordinates.get("taskName") + " can't get node list from API.", nodeResponse); return; } String nodeJson = nodeResponse.response; Type nodeType = new TypeToken<HashMap<String, HashMap<String, String>>>() { }.getType(); try { this.nodes = gson.fromJson(nodeJson, nodeType); } catch (JsonSyntaxException e) { this.logException("ERROR", "TASK GET NODES", "Task " + this.coordinates.get("taskName") + " can't parse nodes list from json.", e); return; } } /* * Thread executor init */ ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(threadCount); // number of threads /* * Futures for workers results return */ List<Future<Boolean>> results = new ArrayList<>(); // Add Workers to Executor // noinspection Java8MapForEach this.nodes.entrySet().forEach(node -> { Map<String, String> currentCoord = new HashMap<>(); currentCoord.putAll(this.coordinates); currentCoord.put("nodeId", node.getKey()); currentCoord.put("workerId", node.getValue().get("id")); currentCoord.put("nodeIp", node.getValue().get("ip")); currentCoord.put("nodeVendor", node.getValue().get("vendor")); currentCoord.put("nodeModel", node.getValue().get("model")); switch (node.getValue().get("get")) { case "snmp": results.add(executor.submit(new WorkerSnmp(currentCoord, this.settings, this.variables))); break; case "telnet": results.add(executor.submit(new WorkerTelnet(currentCoord, this.settings, this.variables))); break; case "ssh": results.add(executor.submit(new WorkerSsh(currentCoord, this.settings, this.variables))); break; default: String unknownProtocol = "Task " + this.coordinates.get("taskName") + " has unknown protocol " + node.getValue().get("get") + ". Node id: " + node.getKey(); this.logMessage("ERROR", "WORKER SPAWN", unknownProtocol); } }); for (Future<Boolean> result : results) { Boolean currentResult; try { currentResult = result.get(); if (currentResult) { this.success++; } else { this.failed++; } } catch (Exception e) { this.logException("ERROR", "TASK GET WORKER RESPONSE", "Task " + this.coordinates.get("taskName") + " was interrupted while waiting for worker result.", e); return; } } executor.shutdown(); /* * Task finish log */ String finalMessage = "Task " + this.coordinates.get("taskName") + " has been finished. " + "Nodes: " + this.nodes.size() + ". Success: " + this.success + ". Failed: " + this.failed + "."; this.logMessage("INFO", "TASK FINISH", finalMessage); }
From source file:org.apache.hadoop.hbase.master.procedure.SplitTableRegionProcedure.java
/** * Create Split directory/*www. ja va 2 s . c om*/ * @param env MasterProcedureEnv * @throws IOException */ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException { final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); final Configuration conf = env.getMasterConfiguration(); // The following code sets up a thread pool executor with as many slots as // there's files to split. It then fires up everything, waits for // completion and finally checks for any exception // // Note: splitStoreFiles creates daughter region dirs under the parent splits dir // Nothing to unroll here if failure -- re-run createSplitsDir will // clean this up. int nbFiles = 0; for (String family : regionFs.getFamilies()) { Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family); if (storeFiles != null) { nbFiles += storeFiles.size(); } } if (nbFiles == 0) { // no file needs to be splitted. return new Pair<Integer, Integer>(0, 0); } // Default max #threads to use is the smaller of table's configured number of blocking store // files or the available number of logical cores. int defMaxThreads = Math.min( conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors()); // Max #threads is the smaller of the number of storefiles or the default max determined above. int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles); LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads + " threads"); ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d")); List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); // Split each store file. final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); for (String family : regionFs.getFamilies()) { final HColumnDescriptor hcd = htd.getFamily(family.getBytes()); final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family); if (storeFiles != null && storeFiles.size() > 0) { final CacheConfig cacheConf = new CacheConfig(conf, hcd); for (StoreFileInfo storeFileInfo : storeFiles) { StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile( mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType())); futures.add(threadPool.submit(sfs)); } } } // Shutdown the pool threadPool.shutdown(); // Wait for all the tasks to finish long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000); try { boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); if (stillRunning) { threadPool.shutdownNow(); // wait for the thread to shutdown completely. while (!threadPool.isTerminated()) { Thread.sleep(50); } throw new IOException( "Took too long to split the" + " files and create the references, aborting split"); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } int daughterA = 0; int daughterB = 0; // Look for any exception for (Future<Pair<Path, Path>> future : futures) { try { Pair<Path, Path> p = future.get(); daughterA += p.getFirst() != null ? 1 : 0; daughterB += p.getSecond() != null ? 1 : 0; } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } catch (ExecutionException e) { throw new IOException(e); } } if (LOG.isDebugEnabled()) { LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles."); } return new Pair<Integer, Integer>(daughterA, daughterB); }
From source file:org.hyperic.hq.measurement.agent.server.ScheduleThread.java
private void collect(ResourceSchedule rs, List<ScheduledMeasurement> items) { final boolean debug = log.isDebugEnabled(); for (int i = 0; (i < items.size()) && (!shouldDie.get()); i++) { ScheduledMeasurement meas = items.get(i); ParsedTemplate tmpl = toParsedTemplate(meas); if (tmpl == null) { log.warn("template for meas id=" + meas.getDerivedID() + " is null"); continue; }/* ww w . j a v a 2 s.c o m*/ ThreadPoolExecutor executor; String plugin; synchronized (executors) { try { GenericPlugin p = manager.getPlugin(tmpl.plugin).getProductPlugin(); plugin = p.getName(); } catch (PluginNotFoundException e) { if (debug) { log.debug("Could not find plugin name from template '" + tmpl.plugin + "'. Associated plugin might not be initialized yet."); } continue; } executor = executors.get(plugin); if (executor == null) { final int poolSize = getPoolSize(plugin); final int queueSize = getQueueSize(plugin); log.info("Creating executor for plugin '" + plugin + "' with a poolsize=" + poolSize + " queuesize=" + queueSize); final ThreadFactory factory = getFactory(plugin); executor = new ThreadPoolExecutor(poolSize, poolSize, 60, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(queueSize), factory, new ThreadPoolExecutor.AbortPolicy()); executors.put(plugin, executor); } } MetricTask metricTask = new MetricTask(rs, meas); statsCollector.addStat(1, SCHEDULE_THREAD_METRIC_TASKS_SUBMITTED); try { Future<?> task = executor.submit(metricTask); synchronized (metricCollections) { metricCollections.put(task, metricTask); } } catch (RejectedExecutionException e) { log.warn("Executor[" + plugin + "] rejected metric task " + metricTask); statNumMetricsFailed++; } } }
From source file:dk.netarkivet.harvester.indexserver.CrawlLogIndexCache.java
/** Combine a number of crawl.log files into one Lucene index. This index * is placed as gzip files under the directory returned by getCacheFile(). * * @param rawfiles The map from job ID into crawl.log contents. No * null values are allowed in this map.//from w ww .ja v a2 s. c o m */ protected void combine(Map<Long, File> rawfiles) { indexingJobCount++; long datasetSize = rawfiles.values().size(); log.info("Starting combine task #" + indexingJobCount + ". This combines a dataset with " + datasetSize + " crawl logs (thread = " + Thread.currentThread().getName() + ")"); File resultDir = getCacheFile(rawfiles.keySet()); Set<File> tmpfiles = new HashSet<File>(); String indexLocation = resultDir.getAbsolutePath() + ".luceneDir"; ThreadPoolExecutor executor = null; try { DigestIndexer indexer = createStandardIndexer(indexLocation); final boolean verboseIndexing = false; DigestOptions indexingOptions = new DigestOptions(this.useBlacklist, verboseIndexing, this.mimeFilter); long count = 0; Set<IndexingState> outstandingJobs = new HashSet<IndexingState>(); final int maxThreads = Settings.getInt(HarvesterSettings.INDEXSERVER_INDEXING_MAXTHREADS); executor = new ThreadPoolExecutor(maxThreads, maxThreads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>()); executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); for (Map.Entry<Long, File> entry : rawfiles.entrySet()) { Long jobId = entry.getKey(); File crawlLog = entry.getValue(); // Generate UUID to ensure a unique filedir for the index. File tmpFile = new File(FileUtils.getTempDir(), UUID.randomUUID().toString()); tmpfiles.add(tmpFile); String localindexLocation = tmpFile.getAbsolutePath(); Long cached = cdxcache.cache(jobId); if (cached == null) { log.warn("Skipping the ingest of logs for job " + entry.getKey() + ". Unable to retrieve cdx-file for job."); continue; } File cachedCDXFile = cdxcache.getCacheFile(cached); // Dispatch this indexing task to a separate thread that // handles the sorting of the logfiles and the generation // of a lucene index for this crawllog and cdxfile. count++; String taskID = count + " out of " + datasetSize; log.debug("Making subthread for indexing job " + jobId + " - task " + taskID); Callable<Boolean> task = new DigestIndexerWorker(localindexLocation, jobId, crawlLog, cachedCDXFile, indexingOptions, taskID); Future<Boolean> result = executor.submit(task); outstandingJobs.add(new IndexingState(jobId, localindexLocation, result)); } // wait for all the outstanding subtasks to complete. Set<Directory> subindices = new HashSet<Directory>(); // Deadline for the combine-task long combineTimeout = Settings.getLong(HarvesterSettings.INDEXSERVER_INDEXING_TIMEOUT); long timeOutTime = System.currentTimeMillis() + combineTimeout; // The indexwriter for the totalindex. IndexWriter totalIndex = indexer.getIndex(); int subindicesInTotalIndex = 0; // Max number of segments in totalindex. int maxSegments = Settings.getInt(HarvesterSettings.INDEXSERVER_INDEXING_MAX_SEGMENTS); final int ACCUMULATED_SUBINDICES_BEFORE_MERGING = 200; while (outstandingJobs.size() > 0) { log.info("Outstanding jobs in combine task #" + indexingJobCount + " is now " + outstandingJobs.size()); Iterator<IndexingState> iterator = outstandingJobs.iterator(); if (timeOutTime < System.currentTimeMillis()) { log.warn("Max indexing time exceeded for one index (" + TimeUtils.readableTimeInterval(combineTimeout) + "). Indexing stops here, although" + " missing subindices for " + outstandingJobs.size() + " jobs"); break; } while (iterator.hasNext() && subindices.size() < ACCUMULATED_SUBINDICES_BEFORE_MERGING) { Future<Boolean> nextResult; IndexingState next = iterator.next(); if (next.getResultObject().isDone()) { nextResult = next.getResultObject(); try { // check, if the indexing failed if (nextResult.get()) { subindices.add(new SimpleFSDirectory(new File(next.getIndex()))); } else { log.warn("Indexing of job " + next.getJobIdentifier() + " failed."); } } catch (InterruptedException e) { log.warn("Unable to get Result back from " + "indexing thread", e); } catch (ExecutionException e) { log.warn("Unable to get Result back from " + "indexing thread", e); } //remove the done object from the set iterator.remove(); } } if (subindices.size() >= ACCUMULATED_SUBINDICES_BEFORE_MERGING) { log.info("Adding " + subindices.size() + " subindices to main index. Forcing index to contain max " + maxSegments + " files (related to combine task # " + indexingJobCount + ")"); totalIndex.addIndexes(subindices.toArray(new Directory[0])); totalIndex.forceMerge(maxSegments); totalIndex.commit(); for (Directory luceneDir : subindices) { luceneDir.close(); } subindicesInTotalIndex += subindices.size(); log.info("Completed adding " + subindices.size() + " subindices to main index, now containing " + subindicesInTotalIndex + " subindices" + "(related to combine task # " + indexingJobCount + ")"); subindices.clear(); } else { sleepAwhile(); } } log.info("Adding the final " + subindices.size() + " subindices to main index. Forcing index to contain max " + maxSegments + " files " + "(related to combine task # " + indexingJobCount + ")"); totalIndex.addIndexes(subindices.toArray(new Directory[0])); totalIndex.forceMerge(maxSegments); totalIndex.commit(); for (Directory luceneDir : subindices) { luceneDir.close(); } subindices.clear(); log.info("Adding operation completed (combine task # " + indexingJobCount + ")!"); long docsInIndex = totalIndex.numDocs(); indexer.close(); log.info("Closed index (related to combine task # " + indexingJobCount); // Now the index is made, gzip it up. File totalIndexDir = new File(indexLocation); log.info("Gzip-compressing the individual " + totalIndexDir.list().length + " index files of combine task # " + indexingJobCount); ZipUtils.gzipFiles(totalIndexDir, resultDir); log.info("Completed combine task # " + indexingJobCount + " that combined a dataset with " + datasetSize + " crawl logs (entries in combined index: " + docsInIndex + ") - compressed index has size " + FileUtils.getHumanReadableFileSize(resultDir)); } catch (IOException e) { throw new IOFailure("Error setting up craw.log index framework for " + resultDir.getAbsolutePath(), e); } finally { // close down Threadpool-executor closeDownThreadpoolQuietly(executor); FileUtils.removeRecursively(new File(indexLocation)); for (File temporaryFile : tmpfiles) { FileUtils.removeRecursively(temporaryFile); } } }