List of usage examples for java.util.concurrent Semaphore acquireUninterruptibly
public void acquireUninterruptibly()
From source file:co.paralleluniverse.photon.Photon.java
public static void main(final String[] args) throws InterruptedException, IOException { final Options options = new Options(); options.addOption("rate", true, "Requests per second (default " + rateDefault + ")"); options.addOption("duration", true, "Minimum test duration in seconds: will wait for <duration> * <rate> requests to terminate or, if progress check enabled, no progress after <duration> (default " + durationDefault + ")"); options.addOption("maxconnections", true, "Maximum number of open connections (default " + maxConnectionsDefault + ")"); options.addOption("timeout", true, "Connection and read timeout in millis (default " + timeoutDefault + ")"); options.addOption("print", true, "Print cycle in millis, 0 to disable intermediate statistics (default " + printCycleDefault + ")"); options.addOption("check", true, "Progress check cycle in millis, 0 to disable progress check (default " + checkCycleDefault + ")"); options.addOption("stats", false, "Print full statistics when finish (default false)"); options.addOption("minmax", false, "Print min/mean/stddev/max stats when finish (default false)"); options.addOption("name", true, "Test name to print in the statistics (default '" + testNameDefault + "')"); options.addOption("help", false, "Print help"); try {// www . j a v a 2s. com final CommandLine cmd = new BasicParser().parse(options, args); final String[] ar = cmd.getArgs(); if (cmd.hasOption("help") || ar.length != 1) printUsageAndExit(options); final String url = ar[0]; final int timeout = Integer.parseInt(cmd.getOptionValue("timeout", timeoutDefault)); final int maxConnections = Integer .parseInt(cmd.getOptionValue("maxconnections", maxConnectionsDefault)); final int duration = Integer.parseInt(cmd.getOptionValue("duration", durationDefault)); final int printCycle = Integer.parseInt(cmd.getOptionValue("print", printCycleDefault)); final int checkCycle = Integer.parseInt(cmd.getOptionValue("check", checkCycleDefault)); final String testName = cmd.getOptionValue("name", testNameDefault); final int rate = Integer.parseInt(cmd.getOptionValue("rate", rateDefault)); final MetricRegistry metrics = new MetricRegistry(); final Meter requestMeter = metrics.meter("request"); final Meter responseMeter = metrics.meter("response"); final Meter errorsMeter = metrics.meter("errors"); final Logger log = LoggerFactory.getLogger(Photon.class); final ConcurrentHashMap<String, AtomicInteger> errors = new ConcurrentHashMap<>(); final HttpGet request = new HttpGet(url); final StripedTimeSeries<Long> sts = new StripedTimeSeries<>(30000, false); final StripedHistogram sh = new StripedHistogram(60000, 5); log.info("name: " + testName + " url:" + url + " rate:" + rate + " duration:" + duration + " maxconnections:" + maxConnections + ", " + "timeout:" + timeout); final DefaultConnectingIOReactor ioreactor = new DefaultConnectingIOReactor(IOReactorConfig.custom() .setConnectTimeout(timeout).setIoThreadCount(10).setSoTimeout(timeout).build()); Runtime.getRuntime().addShutdownHook(new Thread(() -> { final List<ExceptionEvent> events = ioreactor.getAuditLog(); if (events != null) events.stream().filter(event -> event != null).forEach(event -> { System.err.println( "Apache Async HTTP Client I/O Reactor Error Time: " + event.getTimestamp()); //noinspection ThrowableResultOfMethodCallIgnored if (event.getCause() != null) //noinspection ThrowableResultOfMethodCallIgnored event.getCause().printStackTrace(); }); if (cmd.hasOption("stats")) printFinishStatistics(errorsMeter, sts, sh, testName); if (!errors.keySet().isEmpty()) errors.entrySet().stream() .forEach(p -> log.info(testName + " " + p.getKey() + " " + p.getValue() + "ms")); System.out.println( testName + " responseTime(90%): " + sh.getHistogramData().getValueAtPercentile(90) + "ms"); if (cmd.hasOption("minmax")) { final HistogramData hd = sh.getHistogramData(); System.out.format("%s %8s%8s%8s%8s\n", testName, "min", "mean", "sd", "max"); System.out.format("%s %8d%8.2f%8.2f%8d\n", testName, hd.getMinValue(), hd.getMean(), hd.getStdDeviation(), hd.getMaxValue()); } })); final PoolingNHttpClientConnectionManager mngr = new PoolingNHttpClientConnectionManager(ioreactor); mngr.setDefaultMaxPerRoute(maxConnections); mngr.setMaxTotal(maxConnections); final CloseableHttpAsyncClient ahc = HttpAsyncClientBuilder.create().setConnectionManager(mngr) .setDefaultRequestConfig(RequestConfig.custom().setLocalAddress(null).build()).build(); try (final CloseableHttpClient client = new FiberHttpClient(ahc)) { final int num = duration * rate; final CountDownLatch cdl = new CountDownLatch(num); final Semaphore sem = new Semaphore(maxConnections); final RateLimiter rl = RateLimiter.create(rate); spawnStatisticsThread(printCycle, cdl, log, requestMeter, responseMeter, errorsMeter, testName); for (int i = 0; i < num; i++) { rl.acquire(); if (sem.availablePermits() == 0) log.debug("Maximum connections count reached, waiting..."); sem.acquireUninterruptibly(); new Fiber<Void>(() -> { requestMeter.mark(); final long start = System.nanoTime(); try { try (final CloseableHttpResponse ignored = client.execute(request)) { responseMeter.mark(); } catch (final Throwable t) { markError(errorsMeter, errors, t); } } catch (final Throwable t) { markError(errorsMeter, errors, t); } finally { final long now = System.nanoTime(); final long millis = TimeUnit.NANOSECONDS.toMillis(now - start); sts.record(start, millis); sh.recordValue(millis); sem.release(); cdl.countDown(); } }).start(); } spawnProgressCheckThread(log, duration, checkCycle, cdl); cdl.await(); } } catch (final ParseException ex) { System.err.println("Parsing failed. Reason: " + ex.getMessage()); } }
From source file:net.sourceforge.fullsync.cli.Main.java
public static void startup(String[] args, Launcher launcher) throws Exception { initOptions();/*from ww w . j a v a 2 s.c o m*/ String configDir = getConfigDir(); CommandLineParser parser = new DefaultParser(); CommandLine line = null; try { line = parser.parse(options, args); } catch (ParseException ex) { System.err.println(ex.getMessage()); printHelp(); System.exit(1); } if (line.hasOption('V')) { System.out.println(String.format("FullSync version %s", Util.getFullSyncVersion())); //$NON-NLS-1$ System.exit(0); } // Apply modifying options if (!line.hasOption("v")) { //$NON-NLS-1$ System.setErr(new PrintStream(new FileOutputStream(getLogFileName()))); } if (line.hasOption("h")) { //$NON-NLS-1$ printHelp(); System.exit(0); } upgradeLegacyPreferencesLocation(configDir); String profilesFile; if (line.hasOption("P")) { //$NON-NLS-1$ profilesFile = line.getOptionValue("P"); //$NON-NLS-1$ } else { profilesFile = configDir + FullSync.PROFILES_XML; upgradeLegacyProfilesXmlLocation(profilesFile); } final String prefrencesFile = configDir + FullSync.PREFERENCES_PROPERTIES; final Injector injector = Guice.createInjector(new FullSyncModule(line, prefrencesFile)); final RuntimeConfiguration rtConfig = injector.getInstance(RuntimeConfiguration.class); injector.getInstance(ProfileManager.class).setProfilesFileName(profilesFile); final ScheduledExecutorService scheduledExecutorService = injector .getInstance(ScheduledExecutorService.class); final EventListener deadEventListener = new EventListener() { private final Logger logger = LoggerFactory.getLogger("DeadEventLogger"); //$NON-NLS-1$ @Subscribe private void onDeadEvent(DeadEvent deadEvent) { if (!(deadEvent.getEvent() instanceof ShutdownEvent)) { logger.warn("Dead event triggered: {}", deadEvent); //$NON-NLS-1$ } } }; final EventBus eventBus = injector.getInstance(EventBus.class); eventBus.register(deadEventListener); final Semaphore sem = new Semaphore(0); Runtime.getRuntime().addShutdownHook(new Thread(() -> { Logger logger = LoggerFactory.getLogger(Main.class); logger.debug("shutdown hook called, starting orderly shutdown"); //$NON-NLS-1$ eventBus.post(new ShutdownEvent()); scheduledExecutorService.shutdown(); try { scheduledExecutorService.awaitTermination(5, TimeUnit.MINUTES); } catch (InterruptedException e) { // not relevant } logger.debug("shutdown hook finished, releaseing main thread semaphore"); //$NON-NLS-1$ sem.release(); })); if (rtConfig.isDaemon().orElse(false).booleanValue() || rtConfig.getProfileToRun().isPresent()) { finishStartup(injector); sem.acquireUninterruptibly(); System.exit(0); } else { launcher.launchGui(injector); System.exit(0); } }
From source file:com.adobe.ags.curly.test.ErrorBehaviorTest.java
private void sync() { Semaphore test = new Semaphore(1); test.acquireUninterruptibly(); Platform.runLater(test::release);/*from ww w. j a va 2s . co m*/ try { test.acquire(); } catch (InterruptedException ex) { Logger.getLogger(ErrorBehaviorTest.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:mitm.common.tools.SendMail.java
private void sendMultiThreaded(final MailTransport mailSender, final MimeMessage message, final Address[] recipients) throws InterruptedException { ExecutorService threadPool = Executors.newCachedThreadPool(); final Semaphore semaphore = new Semaphore(threads, true); final long startTime = System.currentTimeMillis(); for (int i = 1; i <= count; i++) { long threadStart = System.currentTimeMillis(); semaphore.acquireUninterruptibly(); threadPool.execute(new Runnable() { @Override//from www .j a v a2s . c o m public void run() { try { MimeMessage clone = MailUtils.cloneMessage(message); int sent = sentCount.incrementAndGet(); if (uniqueFrom) { Address[] froms = clone.getFrom(); if (froms != null && froms.length > 0) { clone.setFrom( new InternetAddress(sent + EmailAddressUtils.getEmailAddress(froms[0]))); } } mailSender.sendMessage(clone, recipients); long timePassed = DateTimeUtils .millisecondsToSeconds(System.currentTimeMillis() - startTime); StrBuilder sb = new StrBuilder(); sb.append("Message\t" + sent + "\tsent."); if (timePassed > 0) { float msgPerSec = (float) sent / timePassed; sb.append("\tmessages/second\t" + String.format("%.2f", msgPerSec)); } logger.info(sb.toString()); } catch (MessagingException e) { logger.error("Error sending message.", e); } finally { semaphore.release(); } } }); if (forceQuit.get()) { break; } if (throtllingSemaphore != null) { /* for throttling the sending of emails */ throtllingSemaphore.acquire(); } else { /* no throttling so use delay */ long sleepTime = delay - (System.currentTimeMillis() - threadStart); if (sleepTime > 0) { Thread.sleep(sleepTime); } } } threadPool.shutdown(); threadPool.awaitTermination(30, TimeUnit.SECONDS); waitForReceiveThreads(); logger.info("Total sent: " + sentCount.intValue() + ". Total time: " + DateTimeUtils.millisecondsToSeconds(System.currentTimeMillis() - startTime) + " (sec.)"); }
From source file:org.apache.hadoop.hdfs.client.impl.TestBlockReaderFactory.java
/** * When an InterruptedException is sent to a thread calling * FileChannel#read, the FileChannel is immediately closed and the * thread gets an exception. This effectively means that we might have * someone asynchronously calling close() on the file descriptors we use * in BlockReaderLocal. So when unreferencing a ShortCircuitReplica in * ShortCircuitCache#unref, we should check if the FileChannel objects * are still open. If not, we should purge the replica to avoid giving * it out to any future readers.//from w w w . j av a 2 s.com * * This is a regression test for HDFS-6227: Short circuit read failed * due to ClosedChannelException. * * Note that you may still get ClosedChannelException errors if two threads * are reading from the same replica and an InterruptedException is delivered * to one of them. */ @Test(timeout = 120000) public void testPurgingClosedReplicas() throws Exception { BlockReaderTestUtil.enableBlockReaderFactoryTracing(); final AtomicInteger replicasCreated = new AtomicInteger(0); final AtomicBoolean testFailed = new AtomicBoolean(false); DFSInputStream.tcpReadsDisabledForTesting = true; BlockReaderFactory.createShortCircuitReplicaInfoCallback = new ShortCircuitCache.ShortCircuitReplicaCreator() { @Override public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { replicasCreated.incrementAndGet(); return null; } }; TemporarySocketDirectory sockDir = new TemporarySocketDirectory(); Configuration conf = createShortCircuitConf("testPurgingClosedReplicas", sockDir); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); final DistributedFileSystem dfs = cluster.getFileSystem(); final String TEST_FILE = "/test_file"; final int TEST_FILE_LEN = 4095; final int SEED = 0xFADE0; final DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(0), conf); DFSTestUtil.createFile(fs, new Path(TEST_FILE), TEST_FILE_LEN, (short) 1, SEED); final Semaphore sem = new Semaphore(0); final List<LocatedBlock> locatedBlocks = cluster.getNameNode().getRpcServer() .getBlockLocations(TEST_FILE, 0, TEST_FILE_LEN).getLocatedBlocks(); final LocatedBlock lblock = locatedBlocks.get(0); // first block final byte[] buf = new byte[TEST_FILE_LEN]; Runnable readerRunnable = new Runnable() { @Override public void run() { try { while (true) { BlockReader blockReader = null; try { blockReader = BlockReaderTestUtil.getBlockReader(cluster.getFileSystem(), lblock, 0, TEST_FILE_LEN); sem.release(); try { blockReader.readAll(buf, 0, TEST_FILE_LEN); } finally { sem.acquireUninterruptibly(); } } catch (ClosedByInterruptException e) { LOG.info("got the expected ClosedByInterruptException", e); sem.release(); break; } finally { if (blockReader != null) blockReader.close(); } LOG.info("read another " + TEST_FILE_LEN + " bytes."); } } catch (Throwable t) { LOG.error("getBlockReader failure", t); testFailed.set(true); sem.release(); } } }; Thread thread = new Thread(readerRunnable); thread.start(); // While the thread is reading, send it interrupts. // These should trigger a ClosedChannelException. while (thread.isAlive()) { sem.acquireUninterruptibly(); thread.interrupt(); sem.release(); } Assert.assertFalse(testFailed.get()); // We should be able to read from the file without // getting a ClosedChannelException. BlockReader blockReader = null; try { blockReader = BlockReaderTestUtil.getBlockReader(cluster.getFileSystem(), lblock, 0, TEST_FILE_LEN); blockReader.readFully(buf, 0, TEST_FILE_LEN); } finally { if (blockReader != null) blockReader.close(); } byte expected[] = DFSTestUtil.calculateFileContentsFromSeed(SEED, TEST_FILE_LEN); Assert.assertTrue(Arrays.equals(buf, expected)); // Another ShortCircuitReplica object should have been created. Assert.assertEquals(2, replicasCreated.get()); dfs.close(); cluster.shutdown(); sockDir.close(); }
From source file:org.apache.hadoop.hdfs.server.blockmanagement.TestBlockReportRateLimiting.java
@Test(timeout = 180000) public void testRateLimitingDuringDataNodeStartup() throws Exception { Configuration conf = new Configuration(); conf.setInt(DFS_NAMENODE_MAX_FULL_BLOCK_REPORT_LEASES, 1); conf.setLong(DFS_NAMENODE_FULL_BLOCK_REPORT_LEASE_LENGTH_MS, 20L * 60L * 1000L); final Semaphore fbrSem = new Semaphore(0); final HashSet<DatanodeID> expectedFbrDns = new HashSet<>(); final HashSet<DatanodeID> fbrDns = new HashSet<>(); final AtomicReference<String> failure = new AtomicReference<String>(""); final BlockManagerFaultInjector injector = new BlockManagerFaultInjector() { private int numLeases = 0; @Override//from ww w.j a v a 2 s . co m public void incomingBlockReportRpc(DatanodeID nodeID, BlockReportContext context) throws IOException { LOG.info("Incoming full block report from " + nodeID + ". Lease ID = 0x" + Long.toHexString(context.getLeaseId())); if (context.getLeaseId() == 0) { setFailure(failure, "Got unexpected rate-limiting-" + "bypassing full block report RPC from " + nodeID); } fbrSem.acquireUninterruptibly(); synchronized (this) { fbrDns.add(nodeID); if (!expectedFbrDns.remove(nodeID)) { setFailure(failure, "Got unexpected full block report " + "RPC from " + nodeID + ". expectedFbrDns = " + Joiner.on(", ").join(expectedFbrDns)); } LOG.info("Proceeding with full block report from " + nodeID + ". Lease ID = 0x" + Long.toHexString(context.getLeaseId())); } } @Override public void requestBlockReportLease(DatanodeDescriptor node, long leaseId) { if (leaseId == 0) { return; } synchronized (this) { numLeases++; expectedFbrDns.add(node); LOG.info("requestBlockReportLease(node=" + node + ", leaseId=0x" + Long.toHexString(leaseId) + "). " + "expectedFbrDns = " + Joiner.on(", ").join(expectedFbrDns)); if (numLeases > 1) { setFailure(failure, "More than 1 lease was issued at once."); } } } @Override public void removeBlockReportLease(DatanodeDescriptor node, long leaseId) { LOG.info("removeBlockReportLease(node=" + node + ", leaseId=0x" + Long.toHexString(leaseId) + ")"); synchronized (this) { numLeases--; } } }; BlockManagerFaultInjector.instance = injector; final int NUM_DATANODES = 5; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); cluster.waitActive(); for (int n = 1; n <= NUM_DATANODES; n++) { LOG.info("Waiting for " + n + " datanode(s) to report in."); fbrSem.release(); Uninterruptibles.sleepUninterruptibly(20, TimeUnit.MILLISECONDS); final int currentN = n; GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { synchronized (injector) { if (fbrDns.size() > currentN) { setFailure(failure, "Expected at most " + currentN + " datanodes to have sent a block report, but actually " + fbrDns.size() + " have."); } return (fbrDns.size() >= currentN); } } }, 25, 50000); } cluster.shutdown(); Assert.assertEquals("", failure.get()); }
From source file:org.apache.hadoop.hdfs.TestBlockReaderFactory.java
/** * When an InterruptedException is sent to a thread calling * FileChannel#read, the FileChannel is immediately closed and the * thread gets an exception. This effectively means that we might have * someone asynchronously calling close() on the file descriptors we use * in BlockReaderLocal. So when unreferencing a ShortCircuitReplica in * ShortCircuitCache#unref, we should check if the FileChannel objects * are still open. If not, we should purge the replica to avoid giving * it out to any future readers.// ww w . j a v a2s .co m * * This is a regression test for HDFS-6227: Short circuit read failed * due to ClosedChannelException. * * Note that you may still get ClosedChannelException errors if two threads * are reading from the same replica and an InterruptedException is delivered * to one of them. */ @Test(timeout = 120000) public void testPurgingClosedReplicas() throws Exception { BlockReaderTestUtil.enableBlockReaderFactoryTracing(); final AtomicInteger replicasCreated = new AtomicInteger(0); final AtomicBoolean testFailed = new AtomicBoolean(false); DFSInputStream.tcpReadsDisabledForTesting = true; BlockReaderFactory.createShortCircuitReplicaInfoCallback = new ShortCircuitCache.ShortCircuitReplicaCreator() { @Override public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { replicasCreated.incrementAndGet(); return null; } }; TemporarySocketDirectory sockDir = new TemporarySocketDirectory(); Configuration conf = createShortCircuitConf("testPurgingClosedReplicas", sockDir); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); final DistributedFileSystem dfs = cluster.getFileSystem(); final String TEST_FILE = "/test_file"; final int TEST_FILE_LEN = 4095; final int SEED = 0xFADE0; final DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(0), conf); DFSTestUtil.createFile(fs, new Path(TEST_FILE), TEST_FILE_LEN, (short) 1, SEED); final Semaphore sem = new Semaphore(0); final List<LocatedBlock> locatedBlocks = cluster.getNameNode().getRpcServer() .getBlockLocations(TEST_FILE, 0, TEST_FILE_LEN).getLocatedBlocks(); final LocatedBlock lblock = locatedBlocks.get(0); // first block final byte[] buf = new byte[TEST_FILE_LEN]; Runnable readerRunnable = new Runnable() { @Override public void run() { try { while (true) { BlockReader blockReader = null; try { blockReader = BlockReaderTestUtil.getBlockReader(cluster, lblock, 0, TEST_FILE_LEN); sem.release(); try { blockReader.readAll(buf, 0, TEST_FILE_LEN); } finally { sem.acquireUninterruptibly(); } } catch (ClosedByInterruptException e) { LOG.info("got the expected ClosedByInterruptException", e); sem.release(); break; } finally { if (blockReader != null) blockReader.close(); } LOG.info("read another " + TEST_FILE_LEN + " bytes."); } } catch (Throwable t) { LOG.error("getBlockReader failure", t); testFailed.set(true); sem.release(); } } }; Thread thread = new Thread(readerRunnable); thread.start(); // While the thread is reading, send it interrupts. // These should trigger a ClosedChannelException. while (thread.isAlive()) { sem.acquireUninterruptibly(); thread.interrupt(); sem.release(); } Assert.assertFalse(testFailed.get()); // We should be able to read from the file without // getting a ClosedChannelException. BlockReader blockReader = null; try { blockReader = BlockReaderTestUtil.getBlockReader(cluster, lblock, 0, TEST_FILE_LEN); blockReader.readFully(buf, 0, TEST_FILE_LEN); } finally { if (blockReader != null) blockReader.close(); } byte expected[] = DFSTestUtil.calculateFileContentsFromSeed(SEED, TEST_FILE_LEN); Assert.assertTrue(Arrays.equals(buf, expected)); // Another ShortCircuitReplica object should have been created. Assert.assertEquals(2, replicasCreated.get()); dfs.close(); cluster.shutdown(); sockDir.close(); }
From source file:org.commoncrawl.service.crawler.CrawlerEngine.java
/** internal loadWorkUnit routine **/ private CrawlSegmentStatus loadCrawlSegment(final CrawlSegment crawlSegment) { _activeLoadCount++;/* ww w . ja v a 2 s . co m*/ // mark the segment as crawling ... crawlSegment.setIsCrawling(true); final CrawlSegmentStatus status = new CrawlSegmentStatus(); status.setListId(crawlSegment.getListId()); status.setSegmentId(crawlSegment.getSegmentId()); status.setLoadStatus(CrawlSegmentStatus.LoadStatus.LOADING); status.setCrawlStatus(CrawlSegmentStatus.CrawlStatus.UNKNOWN); status.setUrlCount(0); status.setUrlsComplete(0); status.setIsDirty(true); _statusMap.put(CrawlLog.makeSegmentLogId(crawlSegment.getListId(), crawlSegment.getSegmentId()), status); if (Environment.detailLogEnabled()) LOG.info("loading crawl segment:" + crawlSegment.getSegmentId()); if (!getServer().externallyManageCrawlSegments()) { // remove crawl segment log from crawl log data structure // (we need to do this to protect the data structure from corruption, since the underlying // worker thread walks the log and reconciles it against the segment data) final CrawlSegmentLog segmentLogObj = (getServer().enableCrawlLog()) ? _crawlLog.removeSegmentLog(crawlSegment.getListId(), crawlSegment.getSegmentId()) : null; if (segmentLogObj == null && getServer().enableCrawlLog()) { _activeLoadCount--; throw new RuntimeException( "Expected Non-NULL CrawlSegmentLog for Segment:" + crawlSegment.getSegmentId()); } getServer().getDefaultThreadPool() .execute(new ConcurrentTask<CrawlSegmentStatus>(getServer().getEventLoop(), new Callable<CrawlSegmentStatus>() { public CrawlSegmentStatus call() throws Exception { try { LOG.info("### SYNC:Loading SegmentFPInfo for List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId()); // load work unit fingerprint detail ... final CrawlSegmentFPMap urlFPMap = SegmentLoader.loadCrawlSegmentFPInfo( crawlSegment.getListId(), crawlSegment.getSegmentId(), CrawlerEngine.this.getServer().getHostName(), new SegmentLoader.CancelOperationCallback() { @Override public boolean cancelOperation() { return _shutdownFlag; } }); if (_shutdownFlag) { LOG.info("### SYNC:EXITING LOAD OF List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId()); return new CrawlSegmentStatus(); } if (getServer().enableCrawlLog()) { LOG.info("### SYNC: Syncing Log to SegmentFPInfo for List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId()); // re-sync log to segment ... segmentLogObj.syncToLog(urlFPMap); } LOG.info("### SYNC: Sync for List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId() + " Returned:" + urlFPMap._urlCount + " Total URLS and " + urlFPMap._urlsComplete + " CompleteURLS"); if (!_shutdownFlag) { // now activate the segment log ... final Semaphore segActiveSemaphore = new Semaphore(0); // check for completion here ... if (urlFPMap._urlCount == urlFPMap._urlsComplete && !_shutdownFlag) { LOG.info("### SYNC: For List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId() + " indicates Completed Segment."); _server.getEventLoop() .setTimer(new Timer(1, false, new Timer.Callback() { public void timerFired(Timer timer) { LOG.info("### SYNC: For List:" + crawlSegment.getListId() + " Segment:" + crawlSegment.getSegmentId() + " setting Status to CompletedCompleted Segment."); if (!_shutdownFlag) { // update segment status ... status.setUrlCount(urlFPMap._urlCount); status.setUrlsComplete(urlFPMap._urlCount); // update crawl status status.setCrawlStatus( CrawlSegmentStatus.CrawlStatus.CRAWL_COMPLETE); status.setIsComplete(true); // set dirty flag for segment status.setIsDirty(true); } // and release semaphore ... segActiveSemaphore.release(); } })); } else { _server.getEventLoop() .setTimer(new Timer(1, false, new Timer.Callback() { public void timerFired(Timer timer) { if (!_shutdownFlag) { if (getServer().enableCrawlLog()) { //back in primary thread context, so go ahead and SAFELY re-activate the segment log ... activateSegmentLog(segmentLogObj); } } // and release semaphore ... segActiveSemaphore.release(); } })); } // wait for segment activation ... segActiveSemaphore.acquireUninterruptibly(); } // now if complete return immediately if (urlFPMap._urlCount != urlFPMap._urlsComplete && !_shutdownFlag) { LOG.info("### LOADER Loading CrawlSegment Detail for Segment:" + crawlSegment.getSegmentId()); SegmentLoader.loadCrawlSegment(crawlSegment.getListId(), crawlSegment.getSegmentId(), CrawlerEngine.this.getServer().getHostName(), urlFPMap, null, createLoadProgressCallback(status), new SegmentLoader.CancelOperationCallback() { @Override public boolean cancelOperation() { return _shutdownFlag; } }); } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw e; } return status; } }, createCompletionCallback(crawlSegment, status))); } else { getServer().loadExternalCrawlSegment(crawlSegment, createLoadProgressCallback(status), createCompletionCallback(crawlSegment, status), status); } return status; }
From source file:org.commoncrawl.service.crawler.CrawlSegmentLog.java
/** sync the incoming segment against the local crawl log and then send it up to the history server **/ public int syncToLog(CrawlSegmentFPMap segmentDetail) throws IOException { if (Environment.detailLogEnabled()) LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Syncing Progress Log"); int itemsProcessed = 0; // and construct a path to the local crawl segment directory ... File activeLogPath = buildActivePath(_rootDataDir, _listId, _segmentId); File checkpointLogPath = buildCheckpointPath(_rootDataDir, _listId, _segmentId); // check if it exists ... if (checkpointLogPath.exists()) { // log it ... if (Environment.detailLogEnabled()) LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Checkpoint Log Found"); // rename it as the active log ... checkpointLogPath.renameTo(activeLogPath); }/*w w w . j a va 2s . c o m*/ if (activeLogPath.exists()) { // reconcile against active log (if it exists) ... _localLogItemCount = reconcileLogFile(FileSystem.getLocal(CrawlEnvironment.getHadoopConfig()), new Path(activeLogPath.getAbsolutePath()), _listId, _segmentId, segmentDetail, null); if (Environment.detailLogEnabled()) LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Reconciled Local Log File with ProcessedItemCount:" + _localLogItemCount); itemsProcessed += _localLogItemCount; } FileSystem hdfs = CrawlEnvironment.getDefaultFileSystem(); // first things first ... check to see if special completion log file exists in hdfs Path hdfsSegmentCompletionLogPath = new Path( CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentCompletionLogFileName(getNodeName())); if (hdfs.exists(hdfsSegmentCompletionLogPath)) { if (Environment.detailLogEnabled()) LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Completion File Found. Marking Segment Complete"); // if the file exists then this segment has been crawled and uploaded already ... // if active log file exists ... delete it ... if (activeLogPath.exists()) activeLogPath.delete(); //reset local log item count ... _localLogItemCount = 0; itemsProcessed = -1; // remove all hosts from segment segmentDetail._urlsComplete = segmentDetail._urlCount; } else { if (segmentDetail != null) { if (Environment.detailLogEnabled()) LOG.info("### SYNC: Building BulkItem History Query for List:" + _listId + " Segment:" + _segmentId); BulkItemHistoryQuery query = buildHistoryQueryBufferFromMap(segmentDetail); if (query != null) { // create blocking semaphore ... final Semaphore semaphore = new Semaphore(1); semaphore.acquireUninterruptibly(); if (Environment.detailLogEnabled()) LOG.info("### SYNC: Dispatching query to history server"); //create an outer response object we can pass aysnc response to ... final BulkItemHistoryQueryResponse outerResponse = new BulkItemHistoryQueryResponse(); CrawlerServer.getServer().getHistoryServiceStub().bulkItemQuery(query, new Callback<BulkItemHistoryQuery, BulkItemHistoryQueryResponse>() { @Override public void requestComplete( final AsyncRequest<BulkItemHistoryQuery, BulkItemHistoryQueryResponse> request) { // response returns in async thread context ... if (request.getStatus() == Status.Success) { if (Environment.detailLogEnabled()) LOG.info( "###SYNC: bulk Query to history server succeeded. setting out resposne"); ImmutableBuffer buffer = request.getOutput().getResponseList(); outerResponse.setResponseList( new Buffer(buffer.getReadOnlyBytes(), 0, buffer.getCount())); } else { LOG.error("###SYNC: bulk Query to history server failed."); } // release semaphore semaphore.release(); } }); LOG.info("###SYNC: Loader thread blocked waiting for bulk query response"); semaphore.acquireUninterruptibly(); LOG.info("###SYNC: Loader thread received response from history server"); if (outerResponse.getResponseList().getCount() == 0) { LOG.error("###SYNC: History Server Bulk Query Returned NULL!!! for List:" + _listId + " Segment:" + _segmentId); } else { // ok time to process the response and integrate the results into the fp list updateFPMapFromBulkQueryResponse(segmentDetail, outerResponse); } } else { if (Environment.detailLogEnabled()) LOG.warn("### SYNC: No fingerprints found when processing segment detail for List:" + _listId + " Segment:" + _segmentId); segmentDetail._urlsComplete = segmentDetail._urlCount; } } /* // and now walk hdfs looking for any checkpointed logs ... // scan based on checkpoint filename ... FileStatus[] remoteCheckpointFiles = hdfs.globStatus(new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString(getNodeName()))); if (remoteCheckpointFiles != null) { LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Found Remote Checkpoint Files"); // create a temp file to hold the reconciled log ... File consolidatedLogFile = null; if (remoteCheckpointFiles.length > 1) { // create temp log file ... consolidatedLogFile = File.createTempFile("SegmentLog", Long.toString(System.currentTimeMillis())); // write out header ... CrawlSegmentLog.writeHeader(consolidatedLogFile,0); } // walk the files for(FileStatus checkpointFilePath : remoteCheckpointFiles) { // and reconcile them against segment ... itemsProcessed += reconcileLogFile(hdfs,checkpointFilePath.getPath(),getListId(),getSegmentId(),segmentDetail,consolidatedLogFile); LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Processed Checkpoint File:" + checkpointFilePath.getPath() + " Items Processed:" + itemsProcessed); } // finally ... if consolidatedLogFile is not null if (consolidatedLogFile != null) { // build a new hdfs file name ... Path consolidatedHDFSPath = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointFileName(getNodeName(), System.currentTimeMillis())); LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Writing Consolidated Log File:" + consolidatedHDFSPath + " to HDFS"); // and copy local file to log ... hdfs.copyFromLocalFile(new Path(consolidatedLogFile.getAbsolutePath()),consolidatedHDFSPath); // and delete all previous log file entries ... for (FileStatus oldCheckPointFile : remoteCheckpointFiles) { hdfs.delete(oldCheckPointFile.getPath()); } consolidatedLogFile.delete(); } } */ } if (segmentDetail != null) { _remainingURLS += (segmentDetail._urlCount - segmentDetail._urlsComplete); // mark url count as valid now ... _urlCountValid = true; // now if remaining url count is zero ... then mark the segment as complete ... if (_remainingURLS == 0 && _localLogItemCount == 0) { _segmentComplete = true; } } if (Environment.detailLogEnabled()) LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Done Syncing Progress Log TotalURLS:" + segmentDetail._urlCount + " RemainingURLS:" + _remainingURLS + " LocalLogItemCount:" + _localLogItemCount); return itemsProcessed; }
From source file:org.commoncrawl.service.listcrawler.CacheManager.java
private final void flushLocalLog(final long bytesToRemove, final int itemsToRemove, final List<FingerprintAndOffsetTuple> flushedTupleList, final ArrayList<IndexDataFileTriple> tempFileTriples) { LOG.info("Acquiring Log Access Semaphores"); // first boost this thread's priority ... int originalThreadPriority = Thread.currentThread().getPriority(); Thread.currentThread().setPriority(Thread.MAX_PRIORITY); // next acquire all permits to the local access log ... block until we get there ... getLocalLogAccessSemaphore().acquireUninterruptibly(LOG_ACCESS_SEMAPHORE_COUNT); // now that we have all the semaphores we need, reduce the thread's priority to normal Thread.currentThread().setPriority(originalThreadPriority); LOG.info("Acquired ALL Log Access Semaphores"); long timeStart = System.currentTimeMillis(); // now we have exclusive access to the local transaction log ... File activeLogFilePath = getActiveLogFilePath(); File checkpointLogFilePath = getCheckpointLogFilePath(); try {/*ww w. j a v a 2s . co m*/ // delete checkpoint file if it existed ... checkpointLogFilePath.delete(); // now rename activelog to checkpoint path activeLogFilePath.renameTo(checkpointLogFilePath); long logFileConsolidationStartTime = System.currentTimeMillis(); // now trap for exceptions in case something fails try { // fix up the header ... _header._fileSize -= bytesToRemove; _header._itemCount -= itemsToRemove; // open a old file and new file RandomAccessFile newFile = new RandomAccessFile(activeLogFilePath, "rw"); RandomAccessFile oldFile = new RandomAccessFile(checkpointLogFilePath, "r"); LOG.info("Opened new and old files. New Header FileSize is:" + _header._fileSize + " ItemCount:" + _header._itemCount); try { // write out header ... long bytesRemainingInLogFile = _header._fileSize; LOG.info("Writing Header to New File. Bytes Remaining for Data are:" + bytesRemainingInLogFile); // write header to new file ... _header.writeHeader(newFile); // decrement bytes available ... bytesRemainingInLogFile -= LocalLogFileHeader.SIZE; if (bytesRemainingInLogFile != 0) { byte transferBuffer[] = new byte[(1 << 20) * 16]; LOG.info("Seeking old file past flushed data (pos:" + LocalLogFileHeader.SIZE + bytesToRemove + ")"); // seek past old data ... oldFile.seek(LocalLogFileHeader.SIZE + bytesToRemove); // and copy across remaining data while (bytesRemainingInLogFile != 0) { int bytesToReadWriteThisIteration = Math.min((int) bytesRemainingInLogFile, transferBuffer.length); oldFile.read(transferBuffer, 0, bytesToReadWriteThisIteration); newFile.write(transferBuffer, 0, bytesToReadWriteThisIteration); LOG.info("Copied " + bytesToReadWriteThisIteration + " from Old to New"); bytesRemainingInLogFile -= bytesToReadWriteThisIteration; } } } finally { if (newFile != null) { newFile.close(); } if (oldFile != null) { oldFile.close(); } } // if we reached here then checkpoint was successfull ... LOG.info("Checkpoint - Log Consolidation Successfull! TOOK:" + (System.currentTimeMillis() - logFileConsolidationStartTime)); LOG.info("Loading Index Files"); for (IndexDataFileTriple triple : tempFileTriples) { LOG.info("Loading Index File:" + triple._localIndexFilePath); final HDFSFileIndex fileIndex = new HDFSFileIndex(_remoteFileSystem, triple._localIndexFilePath, triple._dataFilePath); LOG.info("Loaded Index File"); // update hdfs index list ... synchronized (CacheManager.this) { LOG.info("Adding HDFS Index to list"); _hdfsIndexList.addElement(fileIndex); } } // create a semaphore to wait on final Semaphore semaphore = new Semaphore(0); LOG.info("Scheduling Async Event"); // now we need to schedule an async call to main thread to update data structures safely ... _eventLoop.setTimer(new Timer(0, false, new Timer.Callback() { @Override public void timerFired(Timer timer) { LOG.info("Cleaning Map"); synchronized (CacheManager.this) { // walk tuples for (FingerprintAndOffsetTuple tuple : flushedTupleList) { //TODO: HACK! // remove from collection ... _fingerprintToLocalLogPos.removeAll(tuple._fingerprint); } } LOG.info("Increment Offset Info"); // finally increment locallog offset by bytes removed ... _localLogStartOffset += bytesToRemove; LOG.info("Releasing Wait Semaphore"); //release wait sempahore semaphore.release(); } })); LOG.info("Waiting for Async Event to Complete"); //wait for async operation to complete ... semaphore.acquireUninterruptibly(); LOG.info("Async Event to Completed"); } catch (IOException e) { LOG.error("Checkpoint Failed with Exception:" + CCStringUtils.stringifyException(e)); // delete new file ... activeLogFilePath.delete(); // and rename checkpoint file to active file ... checkpointLogFilePath.renameTo(activeLogFilePath); } } finally { LOG.info("Releasing ALL Log Access Semaphores. HELD FOR:" + (System.currentTimeMillis() - timeStart)); getLocalLogAccessSemaphore().release(LOG_ACCESS_SEMAPHORE_COUNT); } }