List of usage examples for java.util.stream StreamSupport stream
public static <T> Stream<T> stream(Spliterator<T> spliterator, boolean parallel)
From source file:edu.jhu.hlt.concrete.ingesters.annotatednyt.AnnotatedNYTIngesterRunner.java
/** * @param args//from w w w . j a va 2 s . c o m */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); AnnotatedNYTIngesterRunner run = new AnnotatedNYTIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(AnnotatedNYTIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); NYTCorpusDocumentParser parser = new NYTCorpusDocumentParser(); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); int nPaths = p.getNameCount(); Path year = p.getName(nPaths - 2); Path outWithExt = outpath.resolve(year.toString() + p.getFileName()); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (InputStream is = Files.newInputStream(p); BufferedInputStream bin = new BufferedInputStream(is); TarGzArchiveEntryByteIterator iter = new TarGzArchiveEntryByteIterator(bin); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterable<byte[]> able = () -> iter; StreamSupport.stream(able.spliterator(), false).map(ba -> parser.fromByteArray(ba, false)) .map(doc -> new AnnotatedNYTDocument(doc)) .map(and -> new CommunicationizableAnnotatedNYTDocument(and).toCommunication()) .forEach(comm -> { try { arch.addEntry(new ArchivableCommunication(comm)); } catch (IOException e) { LOGGER.error("Caught exception processing file: " + pstr, e); } }); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:com.ikanow.aleph2.example.external_harvester.services.ExternalProcessLaunchService.java
public static void main(String[] args) throws InstantiationException, IllegalAccessException, ClassNotFoundException, JsonProcessingException, IOException, InterruptedException, ExecutionException { final ObjectMapper mapper = BeanTemplateUtils.configureMapper(Optional.empty()); // Get the context (unused here) final IHarvestContext context = ContextUtils.getHarvestContext(args[0]); final DataBucketBean bucket = context.getBucket().get(); _logger.info("Launched context, eg bucket status = : " + BeanTemplateUtils.toJson(context.getBucketStatus(Optional.empty()).get())); _logger.info("Retrieved bucket from CON: " + BeanTemplateUtils.toJson(bucket)); // Get the bucket (unused here) _logger.info("Retrieved arg from CLI: " + args[1]); // Check that joins the cluster if I request the data bucket store //context.getService(IManagementDbService.class, Optional.of("core_management_db")).get().getDataBucketStore(); //(But not if it's in read only mode) final IManagementCrudService<DataBucketBean> bucket_service = context.getServiceContext() .getCoreManagementDbService().readOnlyVersion().getDataBucketStore(); _logger.info("Getting Management DB and reading number of buckets = " + bucket_service.countObjects().get().intValue()); // Demonstration of accessing (read only) library state information: final Tuple2<SharedLibraryBean, Optional<GlobalConfigBean>> lib_config = ExternalProcessHarvestTechnology .getConfig(context);//from ww w. j a v a 2s . c om _logger.info("Retrieved library configuration: " + lib_config._2().map(g -> BeanTemplateUtils.toJson(g).toString()).orElse("(no config)")); // 1) Preferred method of getting per library state: final ICrudService<ProcessInfoBean> pid_crud = context .getGlobalHarvestTechnologyObjectStore(ProcessInfoBean.class, ProcessInfoBean.PID_COLLECTION_NAME); // 2) Lower level way: //final IManagementDbService core_db = context.getServiceContext().getCoreManagementDbService(); //final ICrudService<ProcessInfoBean> pid_crud = core_db.getPerLibraryState(ProcessInfoBean.class, lib_config._1(), ProcessInfoBean.PID_COLLECTION_NAME); // 3) Alternatively (this construct is how you get per bucket state also): //final ICrudService<ProcessInfoBean> pid_crud = context.getBucketObjectStore(ProcessInfoBean.class, Optional.empty(), ProcessInfoBean.PID_COLLECTION_NAME, Optional.of(AssetStateDirectoryBean.StateDirectoryType.library)); lib_config._2().ifPresent(gc -> { if (gc.store_pids_in_db()) pid_crud.getObjectsBySpec(CrudUtils.allOf(ProcessInfoBean.class).when(ProcessInfoBean::bucket_name, bucket.full_name())).thenAccept(cursor -> { String pids = StreamSupport.stream(cursor.spliterator(), false).map(c -> c._id()) .collect(Collectors.joining(",")); _logger.info("Pids/hostnames for this bucket: " + pids); }).exceptionally(err -> { _logger.error("Failed to get bucket pids", err); return null; }); }); // Just run for 10 minutes as an experiment for (int i = 0; i < 60; ++i) { // Example of promoting data to next stage if ((MasterEnrichmentType.streaming == bucket.master_enrichment_type()) || (MasterEnrichmentType.streaming_and_batch == bucket.master_enrichment_type())) { // Send an object to kafka final JsonNode json = mapper.createObjectNode().put("@timestamp", new Date().getTime()) .put("test_str", "test" + i).put("test_int", i); _logger.info("Sending object to kafka: " + json); context.sendObjectToStreamingPipeline(Optional.empty(), Either.left(json)); } _logger.info("(sleeping: " + i + ")"); try { Thread.sleep(10L * 1000L); } catch (Exception e) { } } }
From source file:com.act.lcms.v2.MZCollisionCounter.java
public static void main(String[] args) throws Exception { CLIUtil cliUtil = new CLIUtil(MassChargeCalculator.class, HELP_MESSAGE, OPTION_BUILDERS); CommandLine cl = cliUtil.parseCommandLine(args); File inputFile = new File(cl.getOptionValue(OPTION_INPUT_INCHI_LIST)); if (!inputFile.exists()) { cliUtil.failWithMessage("Input file at does not exist at %s", inputFile.getAbsolutePath()); }/* w w w. ja v a 2 s . c om*/ List<MassChargeCalculator.MZSource> sources = new ArrayList<>(); try (BufferedReader reader = new BufferedReader(new FileReader(inputFile))) { String line; while ((line = reader.readLine()) != null) { line = line.trim(); sources.add(new MassChargeCalculator.MZSource(line)); if (sources.size() % 1000 == 0) { LOGGER.info("Loaded %d sources from input file", sources.size()); } } } Set<String> considerIons = Collections.emptySet(); if (cl.hasOption(OPTION_ONLY_CONSIDER_IONS)) { List<String> ions = Arrays.asList(cl.getOptionValues(OPTION_ONLY_CONSIDER_IONS)); LOGGER.info("Only considering ions for m/z calculation: %s", StringUtils.join(ions, ", ")); considerIons = new HashSet<>(ions); } TSVWriter<String, Long> tsvWriter = new TSVWriter<>(Arrays.asList("collisions", "count")); tsvWriter.open(new File(cl.getOptionValue(OPTION_OUTPUT_FILE))); try { LOGGER.info("Loaded %d sources in total from input file", sources.size()); MassChargeCalculator.MassChargeMap mzMap = MassChargeCalculator.makeMassChargeMap(sources, considerIons); if (!cl.hasOption(OPTION_COUNT_WINDOW_INTERSECTIONS)) { // Do an exact analysis of the m/z collisions if windowing is not specified. LOGGER.info("Computing precise collision histogram."); Iterable<Double> mzs = mzMap.ionMZIter(); Map<Integer, Long> collisionHistogram = histogram( StreamSupport.stream(mzs.spliterator(), false).map(mz -> { // See comment about Iterable below. try { return mzMap.ionMZToMZSources(mz).size(); } catch (NoSuchElementException e) { LOGGER.error("Caught no such element exception for mz %f: %s", mz, e.getMessage()); throw e; } })); List<Integer> sortedCollisions = new ArrayList<>(collisionHistogram.keySet()); Collections.sort(sortedCollisions); for (Integer collision : sortedCollisions) { tsvWriter.append(new HashMap<String, Long>() { { put("collisions", collision.longValue()); put("count", collisionHistogram.get(collision)); } }); } } else { /* After some deliberation (thanks Gil!), the windowed variant of this calculation counts the number of * structures whose 0.01 Da m/z windows (for some set of ions) overlap with each other. * * For example, let's assume we have five total input structures, and are only searching for one ion. Let's * also assume that three of those structures have m/z A and the remaining two have m/z B. The windows might * look like this in the m/z domain: * |----A----| * |----B----| * Because A represents three structures and overlaps with B, which represents two, we assign A a count of 5-- * this is the number of structures we believe could fall into the range of A given our current peak calling * approach. Similarly, B is assigned a count of 5, as the possibility for collision/confusion is symmetric. * * Note that this is an over-approximation of collisions, as we could more precisely only consider intersections * when the exact m/z of B falls within the window around A and vice versa. However, because we have observed * cases where the MS sensor doesn't report structures at exactly the m/z we predict, we employ this weaker * definition of intersection to give a slightly pessimistic view of what confusions might be possible. */ // Compute windows for every m/z. We don't care about the original mz values since we just want the count. List<Double> mzs = mzMap.ionMZsSorted(); final Double windowHalfWidth; if (cl.hasOption(OPTION_WINDOW_HALFWIDTH)) { // Don't use get with default for this option, as we want the exact FP value of the default tolerance. windowHalfWidth = Double.valueOf(cl.getOptionValue(OPTION_WINDOW_HALFWIDTH)); } else { windowHalfWidth = DEFAULT_WINDOW_TOLERANCE; } /* Window = (lower bound, upper bound), counter of represented m/z's that collide with this window, and number * of representative structures (which will be used in counting collisions). */ LinkedList<CollisionWindow> allWindows = new LinkedList<CollisionWindow>() { { for (Double mz : mzs) { // CPU for memory trade-off: don't re-compute the window bounds over and over and over and over and over. try { add(new CollisionWindow(mz, windowHalfWidth, mzMap.ionMZToMZSources(mz).size())); } catch (NoSuchElementException e) { LOGGER.error("Caught no such element exception for mz %f: %s", mz, e.getMessage()); throw e; } } } }; // Sweep line time! The window ranges are the interesting points. We just accumulate overlap counts as we go. LinkedList<CollisionWindow> workingSet = new LinkedList<>(); List<CollisionWindow> finished = new LinkedList<>(); while (allWindows.size() > 0) { CollisionWindow thisWindow = allWindows.pop(); // Remove any windows from the working set that don't overlap with the next window. while (workingSet.size() > 0 && workingSet.peekFirst().getMaxMZ() < thisWindow.getMinMZ()) { finished.add(workingSet.pop()); } for (CollisionWindow w : workingSet) { /* Add the size of the new overlapping window's structure count to each of the windows in the working set, * which represents the number of possible confused structures that fall within the overlapping region. * We exclude the window itself as it should already have counted the colliding structures it represents. */ w.getAccumulator().add(thisWindow.getStructureCount()); /* Reciprocally, add the structure counts of all windows with which the current window overlaps to it. */ thisWindow.getAccumulator().add(w.getStructureCount()); } // Now that accumulation is complete, we can safely add the current window. workingSet.add(thisWindow); } // All the interesting events are done, so drop the remaining windows into the finished set. finished.addAll(workingSet); Map<Long, Long> collisionHistogram = histogram( finished.stream().map(w -> w.getAccumulator().longValue())); List<Long> sortedCollisions = new ArrayList<>(collisionHistogram.keySet()); Collections.sort(sortedCollisions); for (Long collision : sortedCollisions) { tsvWriter.append(new HashMap<String, Long>() { { put("collisions", collision); put("count", collisionHistogram.get(collision)); } }); } } } finally { if (tsvWriter != null) { tsvWriter.close(); } } }
From source file:Main.java
public static <T> Stream<T> streamOf(Iterable<T> it) { return StreamSupport.stream(it.spliterator(), false); }
From source file:Main.java
/** * Converts an iterable into a stream.// w ww.ja va2 s .c o m * By default the stream cannot be parallelized. * @param iterable iterable to convert into a stream * @return stream */ public static <T> Stream<T> toStream(Iterable<T> iterable) { return StreamSupport.stream(iterable.spliterator(), false); }
From source file:Main.java
public static <T> Stream<T> stream(Iterator<T> iterator) { Iterable<T> iterable = () -> iterator; return StreamSupport.stream(iterable.spliterator(), false); }
From source file:Main.java
/** * Creates a parallel {@code Stream} using a given {@code Iterator} * as the source of elements, with no initial size estimate. * * @return a parallel {@code Stream} over the remaining items in the iterator * @see Spliterators#spliteratorUnknownSize(Iterator, int) *//*from w w w . java2 s.c o m*/ public static <T> Stream<T> parallelStream(Iterator<T> iterator) { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, 0), true); }
From source file:Main.java
/** * Creates a sequential {@code Stream} using a given {@code Iterator} * as the source of elements, with no initial size estimate. * * @return a sequential {@code Stream} over the remaining items in the iterator * @see Spliterators#spliteratorUnknownSize(Iterator, int) *///ww w. j av a 2 s.c om public static <T> Stream<T> stream(Iterator<T> iterator) { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, 0), false); }
From source file:Main.java
static void appendProps(Map<String, Object> properties, String key, Iterable<?> valuesToAppend) { properties.putIfAbsent(key, new LinkedHashSet<String>()); StreamSupport.stream(valuesToAppend.spliterator(), false) .forEach(v -> ((Collection<String>) properties.get(key)).add(v.toString())); }
From source file:org.codice.alliance.libs.mpegts.TSStream.java
/** * Create a stream of PESPackets from a byte source. * * @param byteSource must be non-null//www . ja v a2 s. c o m * @return stream of PESPackets * @throws IOException */ public static Stream<PESPacket> from(ByteSource byteSource) throws IOException { notNull(byteSource, "byteSource must be non-null"); return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new PESPacketIterator(byteSource), Spliterator.ORDERED | Spliterator.NONNULL), false); }