Example usage for java.util.concurrent ThreadPoolExecutor submit

Introduction

In this page you can find the example usage for java.util.concurrent ThreadPoolExecutor submit.

Prototype

public Future<?> submit(Runnable task)

Source Link

Usage

From source file:org.deeplearning4j.models.word2vec.Word2Vec.java

/**
 * Train the model//w  ww. jav a  2s .c o  m
 */
public void fit() throws IOException {
    boolean loaded = buildVocab();
    //save vocab after building
    if (!loaded && saveVocab)
        vocab().saveVocab();
    if (stopWords == null)
        readStopWords();

    log.info("Training word2vec multithreaded");

    if (sentenceIter != null)
        sentenceIter.reset();
    if (docIter != null)
        docIter.reset();

    int[] docs = vectorizer.index().allDocs();

    if (docs.length < 1) {
        vectorizer.fit();
    }

    docs = vectorizer.index().allDocs();
    if (docs.length < 1) {
        throw new IllegalStateException("No documents found");
    }

    totalWords = vectorizer.numWordsEncountered();
    if (totalWords < 1)
        throw new IllegalStateException("Unable to train, total words less than 1");

    totalWords *= numIterations;

    log.info("Processing sentences...");

    AtomicLong numWordsSoFar = new AtomicLong(0);
    final AtomicLong nextRandom = new AtomicLong(5);
    ExecutorService exec = new ThreadPoolExecutor(Runtime.getRuntime().availableProcessors(),
            Runtime.getRuntime().availableProcessors(), 0L, TimeUnit.MILLISECONDS,
            new LinkedBlockingQueue<Runnable>(), new RejectedExecutionHandler() {
                @Override
                public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                    }
                    executor.submit(r);
                }
            });

    final Queue<List<VocabWord>> batch2 = new ConcurrentLinkedDeque<>();
    vectorizer.index().eachDoc(new Function<List<VocabWord>, Void>() {
        @Override
        public Void apply(List<VocabWord> input) {
            List<VocabWord> batch = new ArrayList<>();
            addWords(input, nextRandom, batch);
            if (!batch.isEmpty()) {
                batch2.add(batch);
            }

            return null;
        }
    }, exec);

    exec.shutdown();
    try {
        exec.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }

    ActorSystem actorSystem = ActorSystem.create();

    for (int i = 0; i < numIterations; i++)
        doIteration(batch2, numWordsSoFar, nextRandom, actorSystem);
    actorSystem.shutdown();

}

From source file:org.apache.accumulo.core.file.rfile.MultiThreadedRFileTest.java

@SuppressFBWarnings(value = "INFORMATION_EXPOSURE_THROUGH_AN_ERROR_MESSAGE", justification = "information put into error message is safe and used for testing")
@Test//from  w  w  w.j  a v  a2 s. co m
public void testMultipleReaders() throws IOException {
    final List<Throwable> threadExceptions = Collections.synchronizedList(new ArrayList<Throwable>());
    Map<String, MutableInt> messages = new HashMap<>();
    Map<String, String> stackTrace = new HashMap<>();

    final TestRFile trfBase = new TestRFile(conf);

    writeData(trfBase);

    trfBase.openReader();

    try {

        validate(trfBase);

        final TestRFile trfBaseCopy = trfBase.deepCopy();

        validate(trfBaseCopy);

        // now start up multiple RFile deepcopies
        int maxThreads = 10;
        String name = "MultiThreadedRFileTestThread";
        ThreadPoolExecutor pool = new ThreadPoolExecutor(maxThreads + 1, maxThreads + 1, 5 * 60,
                TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamingThreadFactory(name));
        pool.allowCoreThreadTimeOut(true);
        try {
            Runnable runnable = () -> {
                try {
                    TestRFile trf = trfBase;
                    synchronized (trfBaseCopy) {
                        trf = trfBaseCopy.deepCopy();
                    }
                    validate(trf);
                } catch (Throwable t) {
                    threadExceptions.add(t);
                }
            };
            for (int i = 0; i < maxThreads; i++) {
                pool.submit(runnable);
            }
        } finally {
            pool.shutdown();
            try {
                pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        for (Throwable t : threadExceptions) {
            String msg = t.getClass() + " : " + t.getMessage();
            if (!messages.containsKey(msg)) {
                messages.put(msg, new MutableInt(1));
            } else {
                messages.get(msg).increment();
            }
            StringWriter string = new StringWriter();
            PrintWriter writer = new PrintWriter(string);
            t.printStackTrace(writer);
            writer.flush();
            stackTrace.put(msg, string.getBuffer().toString());
        }
    } finally {
        trfBase.closeReader();
        trfBase.close();
    }

    for (String message : messages.keySet()) {
        LOG.error(messages.get(message) + ": " + message);
        LOG.error(stackTrace.get(message));
    }

    assertTrue(threadExceptions.isEmpty());
}

From source file:com.splicemachine.derby.stream.control.ControlDataSet.java

@Override
public DataSet<V> union(DataSet<V> dataSet) {
    ThreadPoolExecutor tpe = null;
    try {/*from   w  w  w .j  a va 2  s  . c  om*/

        ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("union-begin-query-%d")
                .setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
                    @Override
                    public void uncaughtException(Thread t, Throwable e) {
                        e.printStackTrace();
                    }
                }).build();
        tpe = new ThreadPoolExecutor(2, 2, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), factory,
                new ThreadPoolExecutor.CallerRunsPolicy());
        tpe.allowCoreThreadTimeOut(false);
        tpe.prestartAllCoreThreads();
        Future<Iterator<V>> leftSideFuture = tpe.submit(new NonLazy(iterator));
        Future<Iterator<V>> rightSideFuture = tpe.submit(new NonLazy(((ControlDataSet<V>) dataSet).iterator));

        return new ControlDataSet<>(Iterators.concat(leftSideFuture.get(), rightSideFuture.get()));
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (tpe != null)
            tpe.shutdown();
    }
}

From source file:org.apache.hadoop.hbase.regionserver.SplitTransactionImpl.java

/**
 * Creates reference files for top and bottom half of the
 * @param hstoreFilesToSplit map of store files to create half file references for.
 * @return the number of reference files that were created.
 * @throws IOException//from ww w  .ja  v  a  2s  . c om
 */
private Pair<Integer, Integer> splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
        throws IOException {
    if (hstoreFilesToSplit == null) {
        // Could be null because close didn't succeed -- for now consider it fatal
        throw new IOException("Close returned empty list of StoreFiles");
    }
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    int nbFiles = 0;
    for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) {
        nbFiles += entry.getValue().size();
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<Integer, Integer>(0, 0);
    }
    // Default max #threads to use is the smaller of table's configured number of blocking store
    // files or the available number of logical cores.
    int defMaxThreads = Math.min(
            parent.conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT),
            Runtime.getRuntime().availableProcessors());
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
    LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent + " using " + maxThreads
            + " threads");
    ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
    builder.setNameFormat("StoreFileSplitter-%1$d");
    ThreadFactory factory = builder.build();
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory);
    List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);

    // Split each store file.
    for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) {
        for (StoreFile sf : entry.getValue()) {
            StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
            futures.add(threadPool.submit(sfs));
        }
    }
    // Shutdown the pool
    threadPool.shutdown();

    // Wait for all the tasks to finish
    try {
        boolean stillRunning = !threadPool.awaitTermination(this.fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException(
                    "Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }

    int created_a = 0;
    int created_b = 0;
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            created_a += p.getFirst() != null ? 1 : 0;
            created_b += p.getSecond() != null ? 1 : 0;
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Split storefiles for region " + this.parent + " Daughter A: " + created_a
                + " storefiles, Daughter B: " + created_b + " storefiles.");
    }
    return new Pair<Integer, Integer>(created_a, created_b);
}

From source file:org.apache.hadoop.hbase.regionserver.IndexSplitTransaction.java

private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit) throws IOException {
    if (hstoreFilesToSplit == null) {
        // Could be null because close didn't succeed -- for now consider it fatal
        throw new IOException("Close returned empty list of StoreFiles");
    }// w w  w .j  ava  2 s .c  o  m
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    int nbFiles = hstoreFilesToSplit.size();
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return;
    }
    ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
    builder.setNameFormat("StoreFileSplitter-%1$d");
    ThreadFactory factory = builder.build();
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
    List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles);

    // Split each store file.
    for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) {
        for (StoreFile sf : entry.getValue()) {
            StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
            futures.add(threadPool.submit(sfs));
        }
    }
    // Shutdown the pool
    threadPool.shutdown();

    // Wait for all the tasks to finish
    try {
        boolean stillRunning = !threadPool.awaitTermination(this.fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException(
                    "Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }

    // Look for any exception
    for (Future<Void> future : futures) {
        try {
            future.get();
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }
}

From source file:org.apache.hadoop.hbase.client.TestHCM.java

/**
 * Tests that a destroyed connection does not have a live zookeeper.
 * Below is timing based.  We put up a connection to a table and then close the connection while
 * having a background thread running that is forcing close of the connection to try and
 * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
 * zk connections./* w ww.j a  v  a  2s .  co  m*/
 * @throws Exception
 */
@Ignore("Flakey test: See HBASE-8996")
@Test
public void testDeleteForZKConnLeak() throws Exception {
    TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
    final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
    config.setInt("zookeeper.recovery.retry", 1);
    config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
    config.setInt("hbase.rpc.timeout", 2000);
    config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);

    ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10, 5, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("test-hcm-delete"));

    pool.submit(new Runnable() {
        @Override
        public void run() {
            while (!Thread.interrupted()) {
                try {
                    HConnection conn = HConnectionManager.getConnection(config);
                    LOG.info("Connection " + conn);
                    HConnectionManager.deleteStaleConnection(conn);
                    LOG.info("Connection closed " + conn);
                    // TODO: This sleep time should be less than the time that it takes to open and close
                    // a table.  Ideally we would do a few runs first to measure.  For now this is
                    // timing based; hopefully we hit the bad condition.
                    Threads.sleep(10);
                } catch (Exception e) {
                }
            }
        }
    });

    // Use connection multiple times.
    for (int i = 0; i < 30; i++) {
        HConnection c1 = null;
        try {
            c1 = ConnectionManager.getConnectionInternal(config);
            LOG.info("HTable connection " + i + " " + c1);
            HTable table = new HTable(config, TABLE_NAME4, pool);
            table.close();
            LOG.info("HTable connection " + i + " closed " + c1);
        } catch (Exception e) {
            LOG.info("We actually want this to happen!!!!  So we can see if we are leaking zk", e);
        } finally {
            if (c1 != null) {
                if (c1.isClosed()) {
                    // cannot use getZooKeeper as method instantiates watcher if null
                    Field zkwField = c1.getClass().getDeclaredField("keepAliveZookeeper");
                    zkwField.setAccessible(true);
                    Object watcher = zkwField.get(c1);

                    if (watcher != null) {
                        if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) {
                            // non-synchronized access to watcher; sleep and check again in case zk connection
                            // hasn't been cleaned up yet.
                            Thread.sleep(1000);
                            if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) {
                                pool.shutdownNow();
                                fail("Live zookeeper in closed connection");
                            }
                        }
                    }
                }
                c1.close();
            }
        }
    }
    pool.shutdownNow();
}

From source file:core.Task.java

private void runNodeTask() {

    /* Single node id */
    String runOnNode = this.coordinates.get("runOnNode");

    /*//  w w w.j  a  v  a 2  s . c  om
     * Get custom user variables
     */
    ApiRequest variablesRequest = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET)
            .setApiMethod("v1/core/get-variables");

    ApiResponse variablesResponse = ApiCaller.request(variablesRequest);

    if (!variablesResponse.success) {
        /*
         * Log record
         * Can't get variables
         */
        this.logSystemBadResponse("ERROR", "TASK GET CUSTOM VARIABLES", "Can't get task variables from API.",
                variablesResponse);
        return;
    }

    String variablesJson = variablesResponse.response;

    Type variablesType = new TypeToken<HashMap<String, String>>() {
    }.getType();
    Map<String, String> customVariables;

    try {
        customVariables = gson.fromJson(variablesJson, variablesType);

        /*
         * Setting hashMap of custom user variables
         */
        for (Map.Entry<String, String> curVar : customVariables.entrySet()) {
            DTOVariableConvertResult curVariableObject = new DTOVariableConvertResult();
            curVariableObject.setAction("process");
            curVariableObject.setStatus("success");
            curVariableObject.setVariableName(curVar.getKey());
            curVariableObject.setVariableValue(curVar.getValue());
            curVariableObject.setResult(curVar.getValue());
            this.variables.put(curVar.getKey(), curVariableObject);
        }

    } catch (Exception e) {
        this.logSystemException("ERROR", "TASK GET CUSTOM VARIABLES", "Can't parse variables list from json.",
                e);
        return;
    }

    /*
     * Add Date variable to variables hashMap
     */
    try {
        DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
        String date = dateFormat.format(new Date());

        DTOVariableConvertResult dateVariableObject = new DTOVariableConvertResult();
        dateVariableObject.setAction("process");
        dateVariableObject.setStatus("success");
        dateVariableObject.setVariableName("%%DATE%%");
        dateVariableObject.setVariableValue(date);
        dateVariableObject.setResult(date);
        this.variables.put("%%DATE%%", dateVariableObject);
    } catch (Exception e) {
        this.logSystemException("ERROR", "TASK GET CUSTOM VARIABLES", "Can't set date variable.", e);
        return;
    }

    /*
     * Run node task on nodes scope
     */
    if (runOnNode == null) {

        /*
         * Get nodes with workers by task
         */
        Map<String, String> params = new HashMap<>();
        params.put("schedule_id", this.coordinates.get("scheduleId"));
        params.put("task_name", this.coordinates.get("taskName"));

        ApiRequest request = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET)
                .setApiMethod("v1/core/get-nodes-workers-by-task").setParams(params);

        ApiResponse nodesResponse = ApiCaller.request(request);

        if (!nodesResponse.success) {
            /*
             * Log record
             * Can't get node list
             */
            this.logBadResponse("ERROR", "TASK GET NODES",
                    "Task " + this.coordinates.get("taskName") + " can't get node list from API.",
                    nodesResponse);
            return;
        }

        String nodesJson = nodesResponse.response;

        Type nodesType = new TypeToken<HashMap<String, HashMap<String, String>>>() {
        }.getType();

        try {

            this.nodes = gson.fromJson(nodesJson, nodesType);

        } catch (JsonSyntaxException e) {
            this.logException("ERROR", "TASK GET NODES",
                    "Task " + this.coordinates.get("taskName") + " can't parse nodes list from json.", e);
            return;
        }
    } else {
        /*  Run node task on single node (on demand) */

        /*
         * Get worker by node id
         */
        Map<String, String> params = new HashMap<>();
        params.put("node_id", runOnNode);
        params.put("task_name", this.coordinates.get("taskName"));

        ApiRequest nodeRequest = new ApiRequest(this.coordinates).setRequestMethod(ApiRequestMethods.GET)
                .setApiMethod("v1/core/get-worker-by-node-id").setParams(params);

        ApiResponse nodeResponse = ApiCaller.request(nodeRequest);

        if (!nodeResponse.success) {
            /*
             * Log record
             * Can't get node list
             */
            this.logBadResponse("ERROR", "TASK GET NODES",
                    "Task " + this.coordinates.get("taskName") + " can't get node list from API.",
                    nodeResponse);
            return;
        }

        String nodeJson = nodeResponse.response;

        Type nodeType = new TypeToken<HashMap<String, HashMap<String, String>>>() {
        }.getType();

        try {

            this.nodes = gson.fromJson(nodeJson, nodeType);

        } catch (JsonSyntaxException e) {
            this.logException("ERROR", "TASK GET NODES",
                    "Task " + this.coordinates.get("taskName") + " can't parse nodes list from json.", e);
            return;
        }
    }

    /*
     * Thread executor init
     */
    ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(threadCount); // number of threads

    /*
     * Futures for workers results return
     */
    List<Future<Boolean>> results = new ArrayList<>();

    // Add Workers to Executor
    // noinspection Java8MapForEach
    this.nodes.entrySet().forEach(node -> {
        Map<String, String> currentCoord = new HashMap<>();
        currentCoord.putAll(this.coordinates);
        currentCoord.put("nodeId", node.getKey());
        currentCoord.put("workerId", node.getValue().get("id"));
        currentCoord.put("nodeIp", node.getValue().get("ip"));

        currentCoord.put("nodeVendor", node.getValue().get("vendor"));
        currentCoord.put("nodeModel", node.getValue().get("model"));

        switch (node.getValue().get("get")) {
        case "snmp":
            results.add(executor.submit(new WorkerSnmp(currentCoord, this.settings, this.variables)));
            break;
        case "telnet":
            results.add(executor.submit(new WorkerTelnet(currentCoord, this.settings, this.variables)));
            break;
        case "ssh":
            results.add(executor.submit(new WorkerSsh(currentCoord, this.settings, this.variables)));
            break;
        default:
            String unknownProtocol = "Task " + this.coordinates.get("taskName") + " has unknown protocol "
                    + node.getValue().get("get") + ". Node id: " + node.getKey();
            this.logMessage("ERROR", "WORKER SPAWN", unknownProtocol);
        }
    });

    for (Future<Boolean> result : results) {

        Boolean currentResult;

        try {
            currentResult = result.get();

            if (currentResult) {
                this.success++;
            } else {
                this.failed++;
            }

        } catch (Exception e) {
            this.logException("ERROR", "TASK GET WORKER RESPONSE", "Task " + this.coordinates.get("taskName")
                    + " was interrupted while waiting for worker result.", e);
            return;
        }
    }

    executor.shutdown();

    /*
     * Task finish log
     */
    String finalMessage = "Task " + this.coordinates.get("taskName") + " has been finished. " + "Nodes: "
            + this.nodes.size() + ". Success: " + this.success + ". Failed: " + this.failed + ".";
    this.logMessage("INFO", "TASK FINISH", finalMessage);

}

From source file:org.apache.hadoop.hbase.master.procedure.SplitTableRegionProcedure.java

/**
 * Create Split directory/*www.  ja  va  2 s  . c  om*/
 * @param env MasterProcedureEnv
 * @throws IOException
 */
private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs)
        throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Configuration conf = env.getMasterConfiguration();

    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    //
    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- re-run createSplitsDir will
    // clean this up.
    int nbFiles = 0;
    for (String family : regionFs.getFamilies()) {
        Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null) {
            nbFiles += storeFiles.size();
        }
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<Integer, Integer>(0, 0);
    }
    // Default max #threads to use is the smaller of table's configured number of blocking store
    // files or the available number of logical cores.
    int defMaxThreads = Math.min(
            conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT),
            Runtime.getRuntime().availableProcessors());
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
    LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads
            + " threads");
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads,
            Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
    List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);

    // Split each store file.
    final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    for (String family : regionFs.getFamilies()) {
        final HColumnDescriptor hcd = htd.getFamily(family.getBytes());
        final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null && storeFiles.size() > 0) {
            final CacheConfig cacheConf = new CacheConfig(conf, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile(
                        mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()));
                futures.add(threadPool.submit(sfs));
            }
        }
    }
    // Shutdown the pool
    threadPool.shutdown();

    // Wait for all the tasks to finish
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
    try {
        boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException(
                    "Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }

    int daughterA = 0;
    int daughterB = 0;
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            daughterA += p.getFirst() != null ? 1 : 0;
            daughterB += p.getSecond() != null ? 1 : 0;
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA
                + " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair<Integer, Integer>(daughterA, daughterB);
}

From source file:org.hyperic.hq.measurement.agent.server.ScheduleThread.java

private void collect(ResourceSchedule rs, List<ScheduledMeasurement> items) {
    final boolean debug = log.isDebugEnabled();
    for (int i = 0; (i < items.size()) && (!shouldDie.get()); i++) {
        ScheduledMeasurement meas = items.get(i);
        ParsedTemplate tmpl = toParsedTemplate(meas);
        if (tmpl == null) {
            log.warn("template for meas id=" + meas.getDerivedID() + " is null");
            continue;
        }/*  ww  w  . j  a v a 2  s.c o m*/
        ThreadPoolExecutor executor;
        String plugin;
        synchronized (executors) {
            try {
                GenericPlugin p = manager.getPlugin(tmpl.plugin).getProductPlugin();
                plugin = p.getName();
            } catch (PluginNotFoundException e) {
                if (debug) {
                    log.debug("Could not find plugin name from template '" + tmpl.plugin
                            + "'. Associated plugin might not be initialized yet.");
                }
                continue;
            }
            executor = executors.get(plugin);
            if (executor == null) {
                final int poolSize = getPoolSize(plugin);
                final int queueSize = getQueueSize(plugin);
                log.info("Creating executor for plugin '" + plugin + "' with a poolsize=" + poolSize
                        + " queuesize=" + queueSize);
                final ThreadFactory factory = getFactory(plugin);
                executor = new ThreadPoolExecutor(poolSize, poolSize, 60, TimeUnit.SECONDS,
                        new LinkedBlockingQueue<Runnable>(queueSize), factory,
                        new ThreadPoolExecutor.AbortPolicy());
                executors.put(plugin, executor);
            }
        }
        MetricTask metricTask = new MetricTask(rs, meas);
        statsCollector.addStat(1, SCHEDULE_THREAD_METRIC_TASKS_SUBMITTED);
        try {
            Future<?> task = executor.submit(metricTask);
            synchronized (metricCollections) {
                metricCollections.put(task, metricTask);
            }
        } catch (RejectedExecutionException e) {
            log.warn("Executor[" + plugin + "] rejected metric task " + metricTask);
            statNumMetricsFailed++;
        }
    }
}

From source file:dk.netarkivet.harvester.indexserver.CrawlLogIndexCache.java

/** Combine a number of crawl.log files into one Lucene index.  This index
 * is placed as gzip files under the directory returned by getCacheFile().
 *
 * @param rawfiles The map from job ID into crawl.log contents. No
 * null values are allowed in this map.//from w ww .ja v  a2 s.  c  o  m
 */
protected void combine(Map<Long, File> rawfiles) {
    indexingJobCount++;
    long datasetSize = rawfiles.values().size();
    log.info("Starting combine task #" + indexingJobCount + ". This combines a dataset with " + datasetSize
            + " crawl logs (thread = " + Thread.currentThread().getName() + ")");

    File resultDir = getCacheFile(rawfiles.keySet());
    Set<File> tmpfiles = new HashSet<File>();
    String indexLocation = resultDir.getAbsolutePath() + ".luceneDir";
    ThreadPoolExecutor executor = null;
    try {
        DigestIndexer indexer = createStandardIndexer(indexLocation);
        final boolean verboseIndexing = false;
        DigestOptions indexingOptions = new DigestOptions(this.useBlacklist, verboseIndexing, this.mimeFilter);
        long count = 0;
        Set<IndexingState> outstandingJobs = new HashSet<IndexingState>();
        final int maxThreads = Settings.getInt(HarvesterSettings.INDEXSERVER_INDEXING_MAXTHREADS);
        executor = new ThreadPoolExecutor(maxThreads, maxThreads, 0L, TimeUnit.MILLISECONDS,
                new LinkedBlockingQueue<Runnable>());

        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());

        for (Map.Entry<Long, File> entry : rawfiles.entrySet()) {
            Long jobId = entry.getKey();
            File crawlLog = entry.getValue();
            // Generate UUID to ensure a unique filedir for the index.
            File tmpFile = new File(FileUtils.getTempDir(), UUID.randomUUID().toString());
            tmpfiles.add(tmpFile);
            String localindexLocation = tmpFile.getAbsolutePath();
            Long cached = cdxcache.cache(jobId);
            if (cached == null) {
                log.warn("Skipping the ingest of logs for job " + entry.getKey()
                        + ". Unable to retrieve cdx-file for job.");
                continue;
            }
            File cachedCDXFile = cdxcache.getCacheFile(cached);

            // Dispatch this indexing task to a separate thread that 
            // handles the sorting of the logfiles and the generation
            // of a lucene index for this crawllog and cdxfile.
            count++;
            String taskID = count + " out of " + datasetSize;
            log.debug("Making subthread for indexing job " + jobId + " - task " + taskID);
            Callable<Boolean> task = new DigestIndexerWorker(localindexLocation, jobId, crawlLog, cachedCDXFile,
                    indexingOptions, taskID);
            Future<Boolean> result = executor.submit(task);
            outstandingJobs.add(new IndexingState(jobId, localindexLocation, result));
        }

        // wait for all the outstanding subtasks to complete.
        Set<Directory> subindices = new HashSet<Directory>();

        // Deadline for the combine-task
        long combineTimeout = Settings.getLong(HarvesterSettings.INDEXSERVER_INDEXING_TIMEOUT);
        long timeOutTime = System.currentTimeMillis() + combineTimeout;

        // The indexwriter for the totalindex.
        IndexWriter totalIndex = indexer.getIndex();
        int subindicesInTotalIndex = 0;
        // Max number of segments in totalindex.
        int maxSegments = Settings.getInt(HarvesterSettings.INDEXSERVER_INDEXING_MAX_SEGMENTS);

        final int ACCUMULATED_SUBINDICES_BEFORE_MERGING = 200;

        while (outstandingJobs.size() > 0) {
            log.info("Outstanding jobs in combine task #" + indexingJobCount + " is now "
                    + outstandingJobs.size());
            Iterator<IndexingState> iterator = outstandingJobs.iterator();
            if (timeOutTime < System.currentTimeMillis()) {
                log.warn("Max indexing time exceeded for one index ("
                        + TimeUtils.readableTimeInterval(combineTimeout) + "). Indexing stops here, although"
                        + " missing subindices for " + outstandingJobs.size() + " jobs");
                break;
            }
            while (iterator.hasNext() && subindices.size() < ACCUMULATED_SUBINDICES_BEFORE_MERGING) {
                Future<Boolean> nextResult;
                IndexingState next = iterator.next();
                if (next.getResultObject().isDone()) {
                    nextResult = next.getResultObject();
                    try {
                        // check, if the indexing failed
                        if (nextResult.get()) {
                            subindices.add(new SimpleFSDirectory(new File(next.getIndex())));
                        } else {
                            log.warn("Indexing of job " + next.getJobIdentifier() + " failed.");
                        }

                    } catch (InterruptedException e) {
                        log.warn("Unable to get Result back from " + "indexing thread", e);
                    } catch (ExecutionException e) {
                        log.warn("Unable to get Result back from " + "indexing thread", e);
                    }
                    //remove the done object from the set
                    iterator.remove();
                }
            }

            if (subindices.size() >= ACCUMULATED_SUBINDICES_BEFORE_MERGING) {

                log.info("Adding " + subindices.size()
                        + " subindices to main index. Forcing index to contain max " + maxSegments
                        + " files (related to combine task # " + indexingJobCount + ")");
                totalIndex.addIndexes(subindices.toArray(new Directory[0]));
                totalIndex.forceMerge(maxSegments);
                totalIndex.commit();
                for (Directory luceneDir : subindices) {
                    luceneDir.close();
                }
                subindicesInTotalIndex += subindices.size();
                log.info("Completed adding " + subindices.size() + " subindices to main index, now containing "
                        + subindicesInTotalIndex + " subindices" + "(related to combine task # "
                        + indexingJobCount + ")");
                subindices.clear();
            } else {
                sleepAwhile();
            }
        }

        log.info("Adding the final " + subindices.size()
                + " subindices to main index. Forcing index to contain max " + maxSegments + " files "
                + "(related to combine task # " + indexingJobCount + ")");

        totalIndex.addIndexes(subindices.toArray(new Directory[0]));
        totalIndex.forceMerge(maxSegments);
        totalIndex.commit();
        for (Directory luceneDir : subindices) {
            luceneDir.close();
        }
        subindices.clear();

        log.info("Adding operation completed (combine task # " + indexingJobCount + ")!");
        long docsInIndex = totalIndex.numDocs();

        indexer.close();
        log.info("Closed index (related to combine task # " + indexingJobCount);

        // Now the index is made, gzip it up.
        File totalIndexDir = new File(indexLocation);
        log.info("Gzip-compressing the individual " + totalIndexDir.list().length
                + " index files of combine task # " + indexingJobCount);
        ZipUtils.gzipFiles(totalIndexDir, resultDir);
        log.info("Completed combine task # " + indexingJobCount + " that combined a dataset with " + datasetSize
                + " crawl logs (entries in combined index: " + docsInIndex + ") - compressed index has size "
                + FileUtils.getHumanReadableFileSize(resultDir));
    } catch (IOException e) {
        throw new IOFailure("Error setting up craw.log index framework for " + resultDir.getAbsolutePath(), e);
    } finally {
        // close down Threadpool-executor
        closeDownThreadpoolQuietly(executor);
        FileUtils.removeRecursively(new File(indexLocation));
        for (File temporaryFile : tmpfiles) {
            FileUtils.removeRecursively(temporaryFile);
        }
    }
}