List of usage examples for java.util BitSet clear
public void clear(int bitIndex)
From source file:org.dataconservancy.packaging.tool.integration.PackageGenerationTest.java
/** * Reads in any BagIt file that uses a ':' to delimit a keyword and value pair. * * @param bagItFile the file to read/* w w w . j av a 2 s . c o m*/ * @return a Map keyed by the keywords, with the List of values as they appear in the file * @throws IOException */ private Map<String, List<String>> parseBagItKeyValuesFile(File bagItFile) throws IOException { Map<String, List<String>> result = new HashMap<>(); // Used to track state; a streams no-no. Probably should do this the old-fashioned way. BitSet bitSet = new BitSet(1); bitSet.set(0); StringBuilder key = new StringBuilder(); Files.lines(bagItFile.toPath(), Charset.forName("UTF-8")).flatMap(line -> Stream .of(line.substring(0, line.indexOf(":")), line.substring(line.indexOf(":") + 1).trim())) .forEach(token -> { if (bitSet.get(0)) { // key key.delete(0, key.length()); result.putIfAbsent(token, new ArrayList<>()); key.append(token); bitSet.clear(0); } else { // value result.get(key.toString()).add(token); bitSet.set(0); } }); return result; }
From source file:hivemall.ftvec.ranking.BprSamplingUDTF.java
/** * Sampling pairs uniform for each user without replacement. Sample a user. Then, sample a pair. * //w w w . j av a2 s . c o m * Caution: This is not a perfect 'without sampling' but it does 'without sampling' for positive * feedbacks. */ private void uniformUserSamplingWithoutReplacement(@Nonnull final PositiveOnlyFeedback feedback, final int numSamples) throws HiveException { int numUsers = feedback.getNumUsers(); if (numUsers == 0) { return; } final int maxItemId = feedback.getMaxItemId(); if (maxItemId <= 0) { throw new HiveException("Invalid maxItemId: " + maxItemId); } final int numItems = maxItemId + 1; final BitSet userBits = new BitSet(numUsers); feedback.getUsers(userBits); final Random rand = new Random(31L); for (int i = 0; i < numSamples && numUsers > 0; i++) { int nthUser = rand.nextInt(numUsers); int user = BitUtils.indexOfSetBit(userBits, nthUser); if (user == -1) { throw new HiveException("Cannot find " + nthUser + "-th user among " + numUsers + " users"); } IntArrayList posItems = feedback.getItems(user, true); assert (posItems != null) : user; int size = posItems.size(); assert (size > 0) : size; if (size == numItems) {// cannot draw a negative item --i; continue; } int posItemIndex = rand.nextInt(size); int posItem = posItems.fastGet(posItemIndex); int negItem; do { negItem = rand.nextInt(maxItemId); } while (posItems.contains(negItem)); posItems.remove(posItemIndex); if (posItems.isEmpty()) { feedback.removeFeedback(user); userBits.clear(user); --numUsers; } forward(user, posItem, negItem); } }
From source file:org.apache.cassandra.concurrent.LongSharedExecutorPoolTest.java
private void testPromptnessOfExecution(long intervalNanos, float loadIncrement) throws InterruptedException, ExecutionException { final int executorCount = 4; int threadCount = 8; int maxQueued = 1024; final WeibullDistribution workTime = new WeibullDistribution(3, 200000); final long minWorkTime = TimeUnit.MICROSECONDS.toNanos(1); final long maxWorkTime = TimeUnit.MILLISECONDS.toNanos(1); final int[] threadCounts = new int[executorCount]; final WeibullDistribution[] workCount = new WeibullDistribution[executorCount]; final ExecutorService[] executors = new ExecutorService[executorCount]; for (int i = 0; i < executors.length; i++) { executors[i] = SharedExecutorPool.SHARED.newExecutor(threadCount, maxQueued, "test" + i, "test" + i); threadCounts[i] = threadCount;/*from ww w . j ava 2 s . c o m*/ workCount[i] = new WeibullDistribution(2, maxQueued); threadCount *= 2; maxQueued *= 2; } long runs = 0; long events = 0; final TreeSet<Batch> pending = new TreeSet<>(); final BitSet executorsWithWork = new BitSet(executorCount); long until = 0; // basic idea is to go through different levels of load on the executor service; initially is all small batches // (mostly within max queue size) of very short operations, moving to progressively larger batches // (beyond max queued size), and longer operations for (float multiplier = 0f; multiplier < 2.01f;) { if (System.nanoTime() > until) { System.out.println(String.format("Completed %.0fK batches with %.1fM events", runs * 0.001f, events * 0.000001f)); events = 0; until = System.nanoTime() + intervalNanos; multiplier += loadIncrement; System.out.println(String.format("Running for %ds with load multiplier %.1f", TimeUnit.NANOSECONDS.toSeconds(intervalNanos), multiplier)); } // wait a random amount of time so we submit new tasks in various stages of long timeout; if (pending.isEmpty()) timeout = 0; else if (Math.random() > 0.98) timeout = Long.MAX_VALUE; else if (pending.size() == executorCount) timeout = pending.first().timeout; else timeout = (long) (Math.random() * pending.last().timeout); while (!pending.isEmpty() && timeout > System.nanoTime()) { Batch first = pending.first(); boolean complete = false; try { for (Result result : first.results.descendingSet()) result.future.get(timeout - System.nanoTime(), TimeUnit.NANOSECONDS); complete = true; } catch (TimeoutException e) { } if (!complete && System.nanoTime() > first.timeout) { for (Result result : first.results) if (!result.future.isDone()) throw new AssertionError(); complete = true; } if (complete) { pending.pollFirst(); executorsWithWork.clear(first.executorIndex); } } // if we've emptied the executors, give all our threads an opportunity to spin down if (timeout == Long.MAX_VALUE) Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS); // submit a random batch to the first free executor service int executorIndex = executorsWithWork.nextClearBit(0); if (executorIndex >= executorCount) continue; executorsWithWork.set(executorIndex); ExecutorService executor = executors[executorIndex]; TreeSet<Result> results = new TreeSet<>(); int count = (int) (workCount[executorIndex].sample() * multiplier); long targetTotalElapsed = 0; long start = System.nanoTime(); long baseTime; if (Math.random() > 0.5) baseTime = 2 * (long) (workTime.sample() * multiplier); else baseTime = 0; for (int j = 0; j < count; j++) { long time; if (baseTime == 0) time = (long) (workTime.sample() * multiplier); else time = (long) (baseTime * Math.random()); if (time < minWorkTime) time = minWorkTime; if (time > maxWorkTime) time = maxWorkTime; targetTotalElapsed += time; Future<?> future = executor.submit(new WaitTask(time)); results.add(new Result(future, System.nanoTime() + time)); } long end = start + (long) Math.ceil(targetTotalElapsed / (double) threadCounts[executorIndex]) + TimeUnit.MILLISECONDS.toNanos(100L); long now = System.nanoTime(); if (runs++ > executorCount && now > end) throw new AssertionError(); events += results.size(); pending.add(new Batch(results, end, executorIndex)); // System.out.println(String.format("Submitted batch to executor %d with %d items and %d permitted millis", executorIndex, count, TimeUnit.NANOSECONDS.toMillis(end - start))); } }
From source file:bes.injector.InjectorBurnTest.java
private void testPromptnessOfExecution(long intervalNanos, float loadIncrement) throws InterruptedException, ExecutionException, TimeoutException { final int executorCount = 4; int threadCount = 8; int maxQueued = 1024; final WeibullDistribution workTime = new WeibullDistribution(3, 200000); final long minWorkTime = TimeUnit.MICROSECONDS.toNanos(1); final long maxWorkTime = TimeUnit.MILLISECONDS.toNanos(1); final int[] threadCounts = new int[executorCount]; final WeibullDistribution[] workCount = new WeibullDistribution[executorCount]; final ExecutorService[] executors = new ExecutorService[executorCount]; final Injector injector = new Injector(""); for (int i = 0; i < executors.length; i++) { executors[i] = injector.newExecutor(threadCount, maxQueued); threadCounts[i] = threadCount;/* ww w .j a v a2 s . co m*/ workCount[i] = new WeibullDistribution(2, maxQueued); threadCount *= 2; maxQueued *= 2; } long runs = 0; long events = 0; final TreeSet<Batch> pending = new TreeSet<Batch>(); final BitSet executorsWithWork = new BitSet(executorCount); long until = 0; // basic idea is to go through different levels of load on the executor service; initially is all small batches // (mostly within max queue size) of very short operations, moving to progressively larger batches // (beyond max queued size), and longer operations for (float multiplier = 0f; multiplier < 2.01f;) { if (System.nanoTime() > until) { System.out.println(String.format("Completed %.0fK batches with %.1fM events", runs * 0.001f, events * 0.000001f)); events = 0; until = System.nanoTime() + intervalNanos; multiplier += loadIncrement; System.out.println(String.format("Running for %ds with load multiplier %.1f", TimeUnit.NANOSECONDS.toSeconds(intervalNanos), multiplier)); } // wait a random amount of time so we submit new tasks in various stages of long timeout; if (pending.isEmpty()) timeout = 0; else if (Math.random() > 0.98) timeout = Long.MAX_VALUE; else if (pending.size() == executorCount) timeout = pending.first().timeout; else timeout = (long) (Math.random() * pending.last().timeout); while (!pending.isEmpty() && timeout > System.nanoTime()) { Batch first = pending.first(); boolean complete = false; try { for (Result result : first.results.descendingSet()) result.future.get(timeout - System.nanoTime(), TimeUnit.NANOSECONDS); complete = true; } catch (TimeoutException e) { } if (!complete && System.nanoTime() > first.timeout) { for (Result result : first.results) if (!result.future.isDone()) throw new AssertionError(); complete = true; } if (complete) { pending.pollFirst(); executorsWithWork.clear(first.executorIndex); } } // if we've emptied the executors, give all our threads an opportunity to spin down if (timeout == Long.MAX_VALUE) { try { Thread.sleep(10); } catch (InterruptedException e) { } } // submit a random batch to the first free executor service int executorIndex = executorsWithWork.nextClearBit(0); if (executorIndex >= executorCount) continue; executorsWithWork.set(executorIndex); ExecutorService executor = executors[executorIndex]; TreeSet<Result> results = new TreeSet<Result>(); int count = (int) (workCount[executorIndex].sample() * multiplier); long targetTotalElapsed = 0; long start = System.nanoTime(); long baseTime; if (Math.random() > 0.5) baseTime = 2 * (long) (workTime.sample() * multiplier); else baseTime = 0; for (int j = 0; j < count; j++) { long time; if (baseTime == 0) time = (long) (workTime.sample() * multiplier); else time = (long) (baseTime * Math.random()); if (time < minWorkTime) time = minWorkTime; if (time > maxWorkTime) time = maxWorkTime; targetTotalElapsed += time; Future<?> future = executor.submit(new WaitTask(time)); results.add(new Result(future, System.nanoTime() + time)); } long end = start + (long) Math.ceil(targetTotalElapsed / (double) threadCounts[executorIndex]) + TimeUnit.MILLISECONDS.toNanos(100L); long now = System.nanoTime(); if (runs++ > executorCount && now > end) throw new AssertionError(); events += results.size(); pending.add(new Batch(results, end, executorIndex)); // System.out.println(String.format("Submitted batch to executor %d with %d items and %d permitted millis", executorIndex, count, TimeUnit.NANOSECONDS.toMillis(end - start))); } }
From source file:gov.noaa.pfel.erddap.util.EDStatic.java
/** * This returns the Oceanic/Atmospheric Acronyms table as a Table: * col0=acronym, col1=fullName.//from w ww.j a va 2 s . c om * THIS IS ONLY FOR GenerateDatasetsXml THREADS -- a few common acronyms are removed. * * @return the oceanic/atmospheric variable names table with some common acronyms removed * @throws Exception if trouble (e.g., file not found) */ public static Table gdxAcronymsTable() throws Exception { if (gdxAcronymsTable == null) { Table table = oceanicAtmosphericAcronymsTable(); StringArray acronymSA = (StringArray) (table.getColumn(0)); StringArray fullNameSA = (StringArray) (table.getColumn(1)); //remove some really common acronyms I don't want to expand BitSet keep = new BitSet(); keep.set(0, acronymSA.size()); String common[] = { //"DOC", "DOD", "DOE", "USDOC", "USDOD", "USDOE", "NOAA", "NASA", "US" }; for (int c = 0; c < common.length; c++) { int po = acronymSA.indexOf(common[c]); if (po >= 0) keep.clear(po); } table.justKeep(keep); gdxAcronymsTable = table; //swap into place } return gdxAcronymsTable; }
From source file:android.support.v7.widget.StaggeredGridLayoutManager2.java
/** * Checks for gaps if we've reached to the top of the list. * <p>//from w w w. j a va 2s. c om * Intermediate gaps created by full span items are tracked via mLaidOutInvalidFullSpan field. */ View hasGapsToFix() { int startChildIndex = 0; int endChildIndex = getChildCount() - 1; BitSet mSpansToCheck = new BitSet(mSpanCount); mSpansToCheck.set(0, mSpanCount, true); final int firstChildIndex, childLimit; final int preferredSpanDir = mOrientation == VERTICAL && isLayoutRTL() ? 1 : -1; if (mShouldReverseLayout) { firstChildIndex = endChildIndex - 1; childLimit = startChildIndex - 1; } else { firstChildIndex = startChildIndex; childLimit = endChildIndex; } final int nextChildDiff = firstChildIndex < childLimit ? 1 : -1; for (int i = firstChildIndex; i != childLimit; i += nextChildDiff) { View child = getChildAt(i); LayoutParams lp = (LayoutParams) child.getLayoutParams(); if (mSpansToCheck.get(lp.mSpan.mIndex)) { if (checkSpanForGap(lp.mSpan)) { return child; } mSpansToCheck.clear(lp.mSpan.mIndex); } if (lp.mFullSpan) { continue; // quick reject } if (i + nextChildDiff != childLimit) { View nextChild = getChildAt(i + nextChildDiff); boolean compareSpans = false; if (mShouldReverseLayout) { // ensure child's end is below nextChild's end int myEnd = mPrimaryOrientation.getDecoratedEnd(child); int nextEnd = mPrimaryOrientation.getDecoratedEnd(nextChild); if (myEnd < nextEnd) { return child;//i should have a better position } else if (myEnd == nextEnd) { compareSpans = true; } } else { int myStart = mPrimaryOrientation.getDecoratedStart(child); int nextStart = mPrimaryOrientation.getDecoratedStart(nextChild); if (myStart > nextStart) { return child;//i should have a better position } else if (myStart == nextStart) { compareSpans = true; } } if (compareSpans) { // equal, check span indices. LayoutParams nextLp = (LayoutParams) nextChild.getLayoutParams(); if (lp.mSpan.mIndex - nextLp.mSpan.mIndex < 0 != preferredSpanDir < 0) { return child; } } } } // everything looks good return null; }
From source file:org.apache.openjpa.kernel.StateManagerImpl.java
/** * Load the given field set from the data store into the instance. * Return true if any data is loaded, false otherwise. *///w w w. j a va 2 s . co m boolean loadFields(BitSet fields, FetchConfiguration fetch, int lockLevel, Object sdata) { // can't load version field from store if (fields != null) { FieldMetaData vfield = _meta.getVersionField(); if (vfield != null) fields.clear(vfield.getIndex()); } boolean ret = false; setLoading(true); try { // if any fields given, load them int len = (fields == null) ? 0 : fields.length(); if (len > 0) { if (fetch == null) fetch = _broker.getFetchConfiguration(); if (!_broker.getStoreManager().load(this, fields, fetch, lockLevel, sdata)) { throw new ObjectNotFoundException(_loc.get("del-instance", _meta.getDescribedType(), _oid)) .setFailedObject(getManagedInstance()); } ret = true; } // make sure version information has been set; version info must // always be set after the first state load or set (which is why // we do this even if no fields were loaded -- could be that this // method is being called after a field is set) // If the _loadVersion field is null AND the version field has been loaded, skip calling sync version. // This indicates that the DB has a null value for the version column. FieldMetaData versionMeta = _meta != null ? _meta.getVersionField() : null; if (_loadVersion == null && (versionMeta != null && !_loaded.get(versionMeta.getIndex()))) { syncVersion(sdata); ret = ret || _loadVersion != null; } } finally { setLoading(false); } // see if the dfg is now loaded; do this regardless of whether we // loaded any fields, cause may already have been loaded by // StoreManager during initialization postLoad(-1, fetch); return ret; }
From source file:android.support.v7.widget.StaggeredGridLayoutManager.java
/** * Checks for gaps if we've reached to the top of the list. * <p>//from w w w . j a va2s . co m * Intermediate gaps created by full span items are tracked via mLaidOutInvalidFullSpan field. */ View hasGapsToFix() { int startChildIndex = 0; int endChildIndex = getChildCount() - 1; BitSet mSpansToCheck = new BitSet(mSpanCount); mSpansToCheck.set(0, mSpanCount, true); final int firstChildIndex, childLimit; final int preferredSpanDir = mOrientation == VERTICAL && isLayoutRTL() ? 1 : -1; if (mShouldReverseLayout) { firstChildIndex = endChildIndex; childLimit = startChildIndex - 1; } else { firstChildIndex = startChildIndex; childLimit = endChildIndex + 1; } final int nextChildDiff = firstChildIndex < childLimit ? 1 : -1; for (int i = firstChildIndex; i != childLimit; i += nextChildDiff) { View child = getChildAt(i); LayoutParams lp = (LayoutParams) child.getLayoutParams(); if (mSpansToCheck.get(lp.mSpan.mIndex)) { if (checkSpanForGap(lp.mSpan)) { return child; } mSpansToCheck.clear(lp.mSpan.mIndex); } if (lp.mFullSpan) { continue; // quick reject } if (i + nextChildDiff != childLimit) { View nextChild = getChildAt(i + nextChildDiff); boolean compareSpans = false; if (mShouldReverseLayout) { // ensure child's end is below nextChild's end int myEnd = mPrimaryOrientation.getDecoratedEnd(child); int nextEnd = mPrimaryOrientation.getDecoratedEnd(nextChild); if (myEnd < nextEnd) { return child;//i should have a better position } else if (myEnd == nextEnd) { compareSpans = true; } } else { int myStart = mPrimaryOrientation.getDecoratedStart(child); int nextStart = mPrimaryOrientation.getDecoratedStart(nextChild); if (myStart > nextStart) { return child;//i should have a better position } else if (myStart == nextStart) { compareSpans = true; } } if (compareSpans) { // equal, check span indices. LayoutParams nextLp = (LayoutParams) nextChild.getLayoutParams(); if (lp.mSpan.mIndex - nextLp.mSpan.mIndex < 0 != preferredSpanDir < 0) { return child; } } } } // everything looks good return null; }
From source file:org.apache.drill.exec.planner.logical.partition.PruneScanRule.java
protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) { final String pruningClassName = getClass().getName(); logger.info("Beginning partition pruning, pruning class: {}", pruningClassName); Stopwatch totalPruningTime = Stopwatch.createStarted(); final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner()); PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel); final BufferAllocator allocator = optimizerContext.getAllocator(); final Object selection = getDrillTable(scanRel).getSelection(); MetadataContext metaContext = null;// w w w . java 2s. co m if (selection instanceof FormatSelection) { metaContext = ((FormatSelection) selection).getSelection().getMetaContext(); } RexNode condition = null; if (projectRel == null) { condition = filterRel.getCondition(); } else { // get the filter as if it were below the projection. condition = RelOptUtil.pushFilterPastProject(filterRel.getCondition(), projectRel); } RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder()); condition = condition.accept(visitor); Map<Integer, String> fieldNameMap = Maps.newHashMap(); List<String> fieldNames = scanRel.getRowType().getFieldNames(); BitSet columnBitset = new BitSet(); BitSet partitionColumnBitSet = new BitSet(); Map<Integer, Integer> partitionMap = Maps.newHashMap(); int relColIndex = 0; for (String field : fieldNames) { final Integer partitionIndex = descriptor.getIdIfValid(field); if (partitionIndex != null) { fieldNameMap.put(partitionIndex, field); partitionColumnBitSet.set(partitionIndex); columnBitset.set(relColIndex); // mapping between the relColIndex and partitionIndex partitionMap.put(relColIndex, partitionIndex); } relColIndex++; } if (partitionColumnBitSet.isEmpty()) { logger.info("No partition columns are projected from the scan..continue. " + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } // stop watch to track how long we spend in different phases of pruning Stopwatch miscTimer = Stopwatch.createUnstarted(); // track how long we spend building the filter tree miscTimer.start(); FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder()); c.analyze(condition); RexNode pruneCondition = c.getFinalCondition(); BitSet referencedDirsBitSet = c.getReferencedDirs(); logger.info("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS)); miscTimer.reset(); if (pruneCondition == null) { logger.info("No conditions were found eligible for partition pruning." + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } // set up the partitions List<PartitionLocation> newPartitions = Lists.newArrayList(); long numTotal = 0; // total number of partitions int batchIndex = 0; PartitionLocation firstLocation = null; LogicalExpression materializedExpr = null; String[] spInfo = null; int maxIndex = -1; BitSet matchBitSet = new BitSet(); // Outer loop: iterate over a list of batches of PartitionLocations for (List<PartitionLocation> partitions : descriptor) { numTotal += partitions.size(); logger.debug("Evaluating partition pruning for batch {}", batchIndex); if (batchIndex == 0) { // save the first location in case everything is pruned firstLocation = partitions.get(0); } final NullableBitVector output = new NullableBitVector( MaterializedField.create("", Types.optional(MinorType.BIT)), allocator); final VectorContainer container = new VectorContainer(); try { final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()]; for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) { SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex)); MajorType type = descriptor.getVectorType(column, settings); MaterializedField field = MaterializedField.create(column.getAsUnescapedPath(), type); ValueVector v = TypeHelper.getNewVector(field, allocator); v.allocateNew(); vectors[partitionColumnIndex] = v; container.add(v); } // track how long we spend populating partition column vectors miscTimer.start(); // populate partition vectors. descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap); logger.info("Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex); miscTimer.reset(); // materialize the expression; only need to do this once if (batchIndex == 0) { materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container); if (materializedExpr == null) { // continue without partition pruning; no need to log anything here since // materializePruneExpr logs it already logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } } output.allocateNew(partitions.size()); // start the timer to evaluate how long we spend in the interpreter evaluation miscTimer.start(); InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr); logger.info( "Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size()); miscTimer.reset(); int recordCount = 0; int qualifiedCount = 0; if (descriptor.supportsMetadataCachePruning() && partitions.get(0) .isCompositePartition() /* apply single partition check only for composite partitions */) { // Inner loop: within each batch iterate over the PartitionLocations for (PartitionLocation part : partitions) { assert part.isCompositePartition(); if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) { newPartitions.add(part); // Rather than using the PartitionLocation, get the array of partition values for the directories that are // referenced by the filter since we are not interested in directory references in other parts of the query. Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount); String[] parts = p.getLeft(); int tmpIndex = p.getRight(); maxIndex = Math.max(maxIndex, tmpIndex); if (spInfo == null) { // initialization spInfo = parts; for (int j = 0; j <= tmpIndex; j++) { if (parts[j] != null) { matchBitSet.set(j); } } } else { // compare the new partition with existing partition for (int j = 0; j <= tmpIndex; j++) { if (parts[j] == null || spInfo[j] == null) { // nulls don't match matchBitSet.clear(j); } else { if (!parts[j].equals(spInfo[j])) { matchBitSet.clear(j); } } } } qualifiedCount++; } recordCount++; } } else { // Inner loop: within each batch iterate over the PartitionLocations for (PartitionLocation part : partitions) { if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) { newPartitions.add(part); qualifiedCount++; } recordCount++; } } logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount); batchIndex++; } catch (Exception e) { logger.warn("Exception while trying to prune partition.", e); logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; // continue without partition pruning } finally { container.clear(); if (output != null) { output.clear(); } } } try { if (newPartitions.size() == numTotal) { logger.info("No partitions were eligible for pruning"); return; } // handle the case all partitions are filtered out. boolean canDropFilter = true; boolean wasAllPartitionsPruned = false; String cacheFileRoot = null; if (newPartitions.isEmpty()) { assert firstLocation != null; // Add the first non-composite partition location, since execution requires schema. // In such case, we should not drop filter. newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0)); canDropFilter = false; // NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of // directories first and the non-composite partition location will still return // directories, not files. So, additional processing is done depending on this flag wasAllPartitionsPruned = true; logger.info( "All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal); // set the cacheFileRoot appropriately if (firstLocation.isCompositePartition()) { cacheFileRoot = descriptor.getBaseTableLocation() + firstLocation.getCompositePartitionPath(); } } logger.info("Pruned {} partitions down to {}", numTotal, newPartitions.size()); List<RexNode> conjuncts = RelOptUtil.conjunctions(condition); List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition); conjuncts.removeAll(pruneConjuncts); RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false); RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder()); condition = condition.accept(reverseVisitor); pruneCondition = pruneCondition.accept(reverseVisitor); if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) { // if metadata cache file could potentially be used, then assign a proper cacheFileRoot int index = -1; if (!matchBitSet.isEmpty()) { String path = ""; index = matchBitSet.length() - 1; for (int j = 0; j < matchBitSet.length(); j++) { if (!matchBitSet.get(j)) { // stop at the first index with no match and use the immediate // previous index index = j - 1; break; } } for (int j = 0; j <= index; j++) { path += "/" + spInfo[j]; } cacheFileRoot = descriptor.getBaseTableLocation() + path; } if (index != maxIndex) { // if multiple partitions are being selected, we should not drop the filter // since we are reading the cache file at a parent/ancestor level canDropFilter = false; } } RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned); if (projectRel != null) { inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel)); } if (newCondition.isAlwaysTrue() && canDropFilter) { call.transformTo(inputRel); } else { final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel)); call.transformTo(newFilter); } setPruneStatus(metaContext, PruneStatus.PRUNED); } catch (Exception e) { logger.warn("Exception while using the pruned partitions.", e); } finally { logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } }
From source file:gov.noaa.pfel.erddap.dataset.EDDTableFromNcFiles.java
/** NOT FOR GENERAL USE. Bob uses this to consolidate the individual GTSPP * data files into 30 x 30 x 1 month files (tiles). * 30 x 30 leads to 12x6=72 files for a given time point, so a request * for a short time but entire world opens ~72 files. * There are ~240 months worth of data, so a request for a small lon lat * range for all time opens ~240 files.//from w w w .j a va2 s . c om * * <p>Why tile? Because there are ~10^6 profiles/year now, so ~10^7 total. * And if 100 bytes of info per file for EDDTableFromFiles fileTable, that's 1 GB!. * So there needs to be fewer files. * We want to balance number of files for 1 time point (all region tiles), * and number of time point files (I'll stick with their use of 1 month). * The tiling size selected is ok, but searches for single profile (by name) * are slow since a given file may have a wide range of station_ids. * * <p>Quality flags * <br>https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf * <br>http://www.ifremer.fr/gosud/formats/gtspp_qcflags.htm * <br>CODE SIGNIFICATION * <br>0 NOT CONTROLLED VALUE * <br>1 CORRECT VALUE * <br>2 VALUE INCONSISTENT WITH STATISTICS * <br>3 DOUBTFUL VALUE (spike, ...) * <br>4 FALSE VALUE (out of scale, constant profile, vertical instability, ...) * <br>5 VALUE MODIFIED DURING QC (only for interpolate location or date) * <br>6-8 Not USED * <br>9 NO VALUE * <br> * <br>I interpret as: okay values are 1, 2, 5 * * @param firstYear e.g., 1990 * @param firstMonth e.g., 1 (1..) * @param lastYear e.g., 2010 * @param lastMonth e.g., 12 (1..) * @param testMode if true, this just processes .nc files * already in testTempDir f:/data/gtspp/testTemp/ * and puts results in testDestDir f:/data/gtspp/testDest/. * So the first/last/Year/Month params are ignored. */ public static void bobConsolidateGtsppTgz(int firstYear, int firstMonth, int lastYear, int lastMonth, boolean testMode) throws Throwable { int chunkSize = 45; //lon width, lat height of a tile, in degrees int minLat = -90; int maxLat = 90; int minLon = -180; int maxLon = 180; String today = Calendar2.getCurrentISODateTimeStringZulu().substring(0, 10); //to nearest day String sevenZip = "c:\\progra~1\\7-Zip\\7z"; String zipDir = "c:\\data\\gtspp\\bestNcZip\\"; //gtspp_at199001.tgz String destDir = "c:\\data\\gtspp\\bestNcConsolidated\\"; String tempDir = "c:\\data\\gtspp\\temp\\"; String testTempDir = "c:\\data\\gtspp\\testTemp\\"; //tempDir if testMode=true String testDestDir = "c:\\data\\gtspp\\testDest\\"; //destDir if testMode=true String logFile = "c:\\data\\gtspp\\log" + String2.replaceAll(today, "-", "") + ".txt"; File2.makeDirectory(tempDir); //https://www.nodc.noaa.gov/GTSPP/document/qcmans/qcflags.htm //1=correct, 2=probably correct, 5=modified (so now correct) //pre 2012-04-15 was {1,2,5} //pre 2012-05-25 was {1,2} int okQF[] = { 1, 2, 5 }; String okQFCsv = String2.toCSSVString(okQF); float depthMV = 99999; //was -99; float temperatureMV = 99999; //was -99; float salinityMV = 99999; //was -99; int qMV = 9; String timeUnits = "days since 1900-01-01 00:00:00"; //causes roundoff error(!) double timeBaseAndFactor[] = Calendar2.getTimeBaseAndFactor(timeUnits); //impossible values: float minDepth = -0.4f, maxDepth = 10000; //-0.4 allows for imprecise values float minTemperature = -4, maxTemperature = 40; float minSalinity = 0, maxSalinity = 41; if (testMode) { firstYear = 1990; firstMonth = 1; lastYear = 1990; lastMonth = 1; } SSR.verbose = false; String2.setupLog(true, false, logFile, false, 1000000000); String2.log("*** starting bobConsolidateGtsppTgz " + Calendar2.getCurrentISODateTimeStringLocalTZ() + "\n" + "logFile=" + String2.logFileName() + "\n" + String2.standardHelpAboutMessage()); long elapsedTime = System.currentTimeMillis(); //q_pos (position quality flag), q_date_time (time quality flag) int stationCol = -1, organizationCol = -1, dataTypeCol = -1, platformCol = -1, cruiseCol = -1, longitudeCol = -1, latitudeCol = -1, timeCol = -1, depthCol = -1, temperatureCol = -1, salinityCol = -1; int totalNGoodStation = 0, totalNGoodPos = 0, totalNGoodTime = 0, totalNGoodDepth = 0, totalNGoodTemperature = 0, totalNGoodSalinity = 0; int totalNBadStation = 0, totalNBadPos = 0, totalNBadTime = 0, totalNBadDepth = 0, totalNBadTemperature = 0, totalNBadSalinity = 0, totalNWarnings = 0, totalNExceptions = 0; long totalNGoodRows = 0, totalNBadRows = 0; StringArray impossibleNanLat = new StringArray(); StringArray impossibleMinLat = new StringArray(); StringArray impossibleMaxLat = new StringArray(); StringArray impossibleNanLon = new StringArray(); StringArray impossibleMinLon = new StringArray(); StringArray impossibleMaxLon = new StringArray(); //StringArray impossibleNaNDepth = new StringArray(); StringArray impossibleMinDepth = new StringArray(); StringArray impossibleMaxDepth = new StringArray(); //StringArray impossibleNanTemperature = new StringArray(); StringArray impossibleMinTemperature = new StringArray(); StringArray impossibleMaxTemperature = new StringArray(); //StringArray impossibleNanSalinity = new StringArray(); StringArray impossibleMinSalinity = new StringArray(); StringArray impossibleMaxSalinity = new StringArray(); int nLons = 0, nLats = 0, nFiles = 0; int lonSum = 0, latSum = 0; long profilesSum = 0; long rowsSum = 0; //*** process a month's data int year = firstYear; int month = firstMonth; long chunkTime = System.currentTimeMillis(); while (year <= lastYear) { String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ() + " start processing year=" + year + " month=" + month); String zMonth = String2.zeroPad("" + month, 2); String zMonth1 = String2.zeroPad("" + (month + 1), 2); double minEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth + "-01"); double maxEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth1 + "-01"); //destination directory String tDestDir = testMode ? testDestDir : destDir + year + "\\" + zMonth + "\\"; File2.makeDirectory(tDestDir); HashMap tableHashMap = new HashMap(); //make sure all files are deleted int waitSeconds = 2; int nAttempts = 10; long cmdTime = System.currentTimeMillis(); String cmd = "del/q " + tDestDir + "*.*"; for (int attempt = 0; attempt < nAttempts; attempt++) { if (attempt % 8 == 0) { String2.log(cmd); SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds //File2.deleteAllFiles(tempDir); //previous method } Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle File destDirFile = new File(tDestDir); File files[] = destDirFile.listFiles(); String2.log(" nRemainingFiles=" + files.length); if (files.length == 0) break; waitSeconds = 2 * nAttempts; } String2.log(" cmd total time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //unzip all atlantic, indian, and pacific .zip files for that month String region2[] = { "at", "in", "pa" }; int nRegions = testMode ? 1 : 3; for (int region = 0; region < nRegions; region++) { String sourceBaseName = "gtspp4_" + region2[region] + year + zMonth; String sourceZipJustFileName = sourceBaseName + ".tgz"; String sourceZipName = zipDir + sourceZipJustFileName; if (!testMode) { //delete all files in tempDir waitSeconds = 2; nAttempts = 10; cmdTime = System.currentTimeMillis(); cmd = "del/q " + tempDir + "*.*"; String2.log(""); //blank line for (int attempt = 0; attempt < nAttempts; attempt++) { String2.log(cmd); SSR.dosShell(cmd, 30 * 60); //30 minutes*60 seconds //File2.deleteAllFiles(tempDir); //previous method //delete dirs too File2.deleteAllFiles(tempDir, true, true); Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle String2.log(" " + Math2.memoryString()); File tempDirFile = new File(tempDir); File files[] = tempDirFile.listFiles(); String2.log(" nRemainingFiles=" + files.length); if (files.length == 0) break; waitSeconds = 2 * nAttempts; } String2.log(" cmd total time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //unzip file into tempDir //gtspp_at199001.zip cmd = sevenZip + " -y e " + sourceZipName + " -o" + tempDir + " -r"; cmdTime = System.currentTimeMillis(); String2.log("\n*** " + cmd); if (File2.isFile(sourceZipName)) { try { SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds String2.log(" cmd time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //extract from the .tar file //gtspp4_at199001.tar cmd = sevenZip + " -y e " + tempDir + sourceBaseName + ".tar -o" + tempDir + " -r"; cmdTime = System.currentTimeMillis(); String2.log("\n*** " + cmd); SSR.dosShell(cmd, 120 * 60); //120 minutes*60 seconds String2.log(" cmd time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); } catch (Exception e) { String2.log("Caught exception: " + MustBe.throwableToString(e)); } } //previous method //SSR.unzip(sourceZipName, // tempDir, true, 100 * 60, null); //ignoreZipDirectories, timeOutSeconds 100 minutes } //read each file and put data in proper table String tTempDir = testMode ? testTempDir : tempDir; File tTempDirAsFile = new File(tTempDir); String sourceFileNames[] = tTempDirAsFile.list(); //just the file names String2.log("\nunzipped " + sourceFileNames.length + " files"); int nSourceFileNames = //testMode? 100 : sourceFileNames.length; int nGoodStation = 0, nGoodPos = 0, nGoodTime = 0, nGoodDepth = 0, nGoodTemperature = 0, nGoodSalinity = 0, nGoodRows = 0; int nBadStation = 0, nBadPos = 0, nBadTime = 0, nBadDepth = 0, nBadTemperature = 0, nBadSalinity = 0, nBadRows = 0, nWarnings = 0, nExceptions = 0; long fileReadTime = System.currentTimeMillis(); profilesSum += nSourceFileNames; for (int sfi = 0; sfi < nSourceFileNames; sfi++) { String sourceFileName = sourceFileNames[sfi]; if (sfi % 10000 == 0) { //if (sfi > 0) //2012-12-13 commented out. Let Java handle it. // Math2.gc(3 * 1000); //gtspp: give OS time to settle //high water mark is ~160 MB, so memory not a problem String2.log("file #" + sfi + " " + Math2.memoryString()); } if (!sourceFileName.endsWith(".nc")) { //String2.log("ERROR: not a .nc file: " + sourceFileName); continue; } NetcdfFile ncFile = null; try { //get the station name //gtspp_13635162_te_111.nc gtspp_10313692_cu_111.nc if (!sourceFileName.matches("gtspp_[0-9]+_.*\\.nc")) { //was "\\d+")) {//all digits nBadStation++; throw new SimpleException("Invalid sourceFileName=" + sourceFileName); } int po = sourceFileName.indexOf('_', 6); if (po < 0) { nBadStation++; throw new SimpleException("Invalid sourceFileName=" + sourceFileName); } int station = String2.parseInt(sourceFileName.substring(6, po)); nGoodStation++; String key = sourceZipJustFileName + " " + sourceFileName; //open the file ncFile = NcHelper.openFile(tTempDir + sourceFileName); Variable var; Attributes tVarAtts = new Attributes(); String tUnits; //get all of the data //stream_ident var = ncFile.findVariable("stream_ident"); String organization = ""; String dataType = ""; if (var == null) { nWarnings++; String2.log("WARNING: No stream_ident in " + sourceFileName); } else { PrimitiveArray streamPA = NcHelper.getPrimitiveArray(var); if (streamPA instanceof StringArray && streamPA.size() > 0) { String stream = streamPA.getString(0); if (stream.length() >= 4) { organization = stream.substring(0, 2).trim(); dataType = stream.substring(2, 4).trim(); } else { String2.log("WARNING: stream_ident isn't a 4 char string: " + stream); } } else { String2.log("WARNING: stream_ident isn't a StringArray: " + streamPA.toString()); } } //platform_code var = ncFile.findVariable("gtspp_platform_code"); String platform = ""; if (var == null) { //a small percentage have this problem //nWarnings++; //String2.log("WARNING: No gtspp_platform_code in " + sourceFileName); } else { PrimitiveArray pa = NcHelper.getPrimitiveArray(var); if (pa instanceof StringArray && pa.size() > 0) { platform = pa.getString(0).trim(); //String2.log("platform_code=" + platform_code); } else { String2.log("WARNING: gtspp_platform_code isn't a StringArray: " + pa.toString()); } } //cruise var = ncFile.findVariable("cruise_id"); String cruise = ""; if (var == null) { nWarnings++; String2.log("WARNING: No cruise_id in " + sourceFileName); } else { PrimitiveArray cruisePA = NcHelper.getPrimitiveArray(var); if (cruisePA instanceof StringArray && cruisePA.size() > 0) { cruise = cruisePA.getString(0).trim(); } else { String2.log("WARNING: cruise_id isn't a StringArray: " + cruisePA.toString()); } } //prof_type is TEMP or PSAL so don't save it. /*var = ncFile.findVariable("prof_type"); String prof_type = ""; if (var == null) { nWarnings++; String2.log("WARNING: No prof_type in " + sourceFileName); } else { PrimitiveArray pa = NcHelper.getPrimitiveArray(var); if (pa instanceof StringArray && pa.size() > 0) { prof_type = pa.getString(0).trim(); String2.log("prof_type=" + prof_type); } else { String2.log("WARNING: prof_type isn't a StringArray: " + pa.toString()); } }*/ //position quality flag var = ncFile.findVariable("position_quality_flag"); //was "q_pos"); if (var == null) { nWarnings++; String2.log("WARNING: No position_quality_flag in " + sourceFileName); } else { PrimitiveArray q_pos = NcHelper.getPrimitiveArray(var); if (!(q_pos instanceof IntArray) || q_pos.size() != 1) throw new SimpleException("Invalid position_quality_flag=" + q_pos); int ti = q_pos.getInt(0); if (String2.indexOf(okQF, ti) < 0) { nBadPos++; continue; } //nGoodPos++; is below } //time quality flag var = ncFile.findVariable("time_quality_flag"); //q_date_time"); if (var == null) { nWarnings++; String2.log("WARNING: No time_quality_flag in " + sourceFileName); } else { PrimitiveArray q_date_time = NcHelper.getPrimitiveArray(var); if (!(q_date_time instanceof IntArray) || q_date_time.size() != 1) throw new SimpleException("Invalid time_quality_flag=" + q_date_time); int ti = q_date_time.getInt(0); if (String2.indexOf(okQF, ti) < 0) { nBadTime++; continue; } //nGoodTime is below } //time var = ncFile.findVariable("time"); if (var == null) throw new SimpleException("No time!"); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); tUnits = tVarAtts.getString("units"); if (!timeUnits.equals(tUnits)) throw new SimpleException("Invalid time units=" + tUnits); PrimitiveArray time = NcHelper.getPrimitiveArray(var); if (!(time instanceof DoubleArray) || time.size() != 1) throw new SimpleException("Invalid time=" + time); double tTime = Calendar2.unitsSinceToEpochSeconds(timeBaseAndFactor[0], timeBaseAndFactor[1], time.getDouble(0)); if (tTime < minEpochSeconds || tTime > maxEpochSeconds) throw new SimpleException( "Invalid tTime=" + Calendar2.safeEpochSecondsToIsoStringTZ(tTime, "")); //original times (that I looked at) are to nearest second //so round to nearest second (fix .99999 problems) tTime = Math.rint(tTime); nGoodTime++; //longitude (position qFlag is good) var = ncFile.findVariable("longitude"); if (var == null) { impossibleNanLon.add(key + " lon=null"); continue; } PrimitiveArray longitude = NcHelper.getPrimitiveArray(var); if (!(longitude instanceof FloatArray) || longitude.size() != 1) { impossibleNanLon.add(key + " lon=wrongTypeOrSize"); continue; } float lon = longitude.getFloat(0); if (Float.isNaN(lon)) { impossibleNanLon.add(key + " lon=NaN"); continue; } else if (lon < minLon) { impossibleMinLon.add(key + " lon=" + lon); //fall through } else if (lon > maxLon) { impossibleMaxLon.add(key + " lon=" + lon); //fall through } lon = (float) Math2.anglePM180(lon); //latitude (position qFlag is good) var = ncFile.findVariable("latitude"); if (var == null) { impossibleNanLat.add(key + " lat=null"); continue; } PrimitiveArray latitude = NcHelper.getPrimitiveArray(var); if (!(latitude instanceof FloatArray) || latitude.size() != 1) { impossibleNanLat.add(key + " lat=wrongTypeOrSize"); continue; } float lat = latitude.getFloat(0); if (Float.isNaN(lat)) { impossibleNanLat.add(key + " lat=NaN"); continue; } else if (lat < minLat) { impossibleMinLat.add(key + " lat=" + lat); continue; } else if (lat > maxLat) { impossibleMaxLat.add(key + " lat=" + lat); continue; } nGoodPos++; //depth var = ncFile.findVariable("z"); if (var == null) throw new SimpleException("No z!"); PrimitiveArray depth = NcHelper.getPrimitiveArray(var); if (!(depth instanceof FloatArray) || depth.size() == 0) throw new SimpleException("Invalid z=" + depth); int nDepth = depth.size(); //DEPH_qparm var = ncFile.findVariable("z_variable_quality_flag"); //DEPH_qparm"); if (var == null) throw new SimpleException("No z_variable_quality_flag!"); PrimitiveArray DEPH_qparm = NcHelper.getPrimitiveArray(var); if (!(DEPH_qparm instanceof IntArray) || DEPH_qparm.size() != nDepth) throw new SimpleException("Invalid z_variable_quality_flag=" + DEPH_qparm); //nGoodDepth is below //temperature var = ncFile.findVariable("temperature"); PrimitiveArray temperature; PrimitiveArray TEMP_qparm; float temperatureFV = temperatureMV; if (var == null) { //nWarnings++; //String2.log("WARNING: No temperature in " + sourceFileName); reasonably common temperature = PrimitiveArray.factory(float.class, nDepth, "" + temperatureMV); TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { temperature = NcHelper.getPrimitiveArray(var); if (!(temperature instanceof FloatArray) || temperature.size() != nDepth) throw new SimpleException("Invalid temperature=" + temperature); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); temperatureFV = tVarAtts.getFloat("_FillValue"); if (!Float.isNaN(temperatureFV) && temperatureFV != temperatureMV) throw new SimpleException("Invalid temperature _FillValue=" + temperatureFV); //TEMP_qparm var = ncFile.findVariable("temperature_quality_flag"); //TEMP_qparm"); if (var == null) { nWarnings++; String2.log("WARNING: No temperature_quality_flag in " + sourceFileName); TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { TEMP_qparm = NcHelper.getPrimitiveArray(var); if (!(TEMP_qparm instanceof IntArray) || TEMP_qparm.size() != nDepth) throw new SimpleException("Invalid temperature_quality_flag=" + TEMP_qparm); } } //salinity var = ncFile.findVariable("salinity"); PrimitiveArray salinity; PrimitiveArray PSAL_qparm; float salinityFV = salinityMV; if (var == null) { //String2.log("WARNING: No salinity in " + sourceFileName); //very common salinity = PrimitiveArray.factory(float.class, nDepth, "" + salinityMV); PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { salinity = NcHelper.getPrimitiveArray(var); if (!(salinity instanceof FloatArray) || salinity.size() != nDepth) throw new SimpleException("Invalid salinity=" + salinity); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); salinityFV = tVarAtts.getFloat("_FillValue"); if (!Float.isNaN(salinityFV) && salinityFV != salinityMV) throw new SimpleException("Invalid salinity _FillValue=" + salinityFV); //PSAL_qparm var = ncFile.findVariable("salinity_quality_flag"); //PSAL_qparm"); if (var == null) { nWarnings++; String2.log("WARNING: No salinity_quality_flag in " + sourceFileName); PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { PSAL_qparm = NcHelper.getPrimitiveArray(var); if (!(PSAL_qparm instanceof IntArray) || PSAL_qparm.size() != nDepth) throw new SimpleException("Invalid salinity_quality_flag=" + PSAL_qparm); } } //clean the data //(good to do it here so memory usage is low -- table remains as small as possible) //Change "impossible" data to NaN //(from https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf //pg 61 has Table 2.1: Global Impossible Parameter Values). BitSet keep = new BitSet(); keep.set(0, nDepth); //all true //find worst impossible depth/temperature/salinity for this station //boolean tImpossibleNanDepth = false; //boolean tImpossibleNanTemperature = false; //boolean tImpossibleNanSalinity = false; float tImpossibleMinDepth = minDepth; float tImpossibleMaxDepth = maxDepth; float tImpossibleMinTemperature = minTemperature; float tImpossibleMaxTemperature = maxTemperature; float tImpossibleMinSalinity = minSalinity; float tImpossibleMaxSalinity = maxSalinity; for (int row = 0; row < nDepth; row++) { //DEPH_qparm int qs = DEPH_qparm.getInt(row); float f = depth.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { nBadDepth++; keep.clear(row); continue; } else if (Float.isNaN(f) || f == depthMV) { //"impossible" depth //tImpossibleNanDepth = true; nBadDepth++; keep.clear(row); continue; } else if (f < minDepth) { tImpossibleMinDepth = Math.min(tImpossibleMinDepth, f); nBadDepth++; keep.clear(row); continue; } else if (f > maxDepth) { tImpossibleMaxDepth = Math.max(tImpossibleMaxDepth, f); nBadDepth++; keep.clear(row); continue; } nGoodDepth++; boolean hasData = false; //temperature qs = TEMP_qparm.getInt(row); f = temperature.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { temperature.setString(row, ""); //so bad value is now NaN nBadTemperature++; } else if (Float.isNaN(f) || f == temperatureMV) { temperature.setString(row, ""); //so missing value is now NaN nBadTemperature++; } else if (f < minTemperature) { //"impossible" water temperature tImpossibleMinTemperature = Math.min(tImpossibleMinTemperature, f); temperature.setString(row, ""); //so impossible value is now NaN nBadTemperature++; } else if (f > maxTemperature) { //"impossible" water temperature tImpossibleMaxTemperature = Math.max(tImpossibleMaxTemperature, f); temperature.setString(row, ""); //so impossible value is now NaN nBadTemperature++; } else { nGoodTemperature++; hasData = true; } //salinity qs = PSAL_qparm.getInt(row); f = salinity.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { salinity.setString(row, ""); //so bad value is now NaN nBadSalinity++; } else if (Float.isNaN(f) || f == salinityMV) { salinity.setString(row, ""); //so missing value is now NaN nBadSalinity++; } else if (f < minSalinity) { //"impossible" salinity tImpossibleMinSalinity = Math.min(tImpossibleMinSalinity, f); salinity.setString(row, ""); //so impossible value is now NaN nBadSalinity++; } else if (f > maxSalinity) { //"impossible" salinity tImpossibleMaxSalinity = Math.max(tImpossibleMaxSalinity, f); salinity.setString(row, ""); //so impossible value is now NaN nBadSalinity++; } else { nGoodSalinity++; hasData = true; } //no valid temperature or salinity data? if (!hasData) { keep.clear(row); } } //ensure sizes still correct Test.ensureEqual(depth.size(), nDepth, "depth.size changed!"); Test.ensureEqual(temperature.size(), nDepth, "temperature.size changed!"); Test.ensureEqual(salinity.size(), nDepth, "salinity.size changed!"); //actually remove the bad rows int tnGood = keep.cardinality(); if (testMode && verbose) String2.log( sourceFileName + ": nGoodRows=" + tnGood + " nBadRows=" + (nDepth - tnGood)); nGoodRows += tnGood; nBadRows += nDepth - tnGood; depth.justKeep(keep); temperature.justKeep(keep); salinity.justKeep(keep); nDepth = depth.size(); //impossible //if (tImpossibleNanDepth) // impossibleNanDepth.add(key + " hasNaN=true"); //if (tImpossibleNanTemperature) // impossibleNanTemperature.add(key + " hasNaN=true"); //if (tImpossibleNanSalinity) // impossibleNanSalinity.add(key + " hasNaN=true"); if (tImpossibleMinDepth < minDepth) impossibleMinDepth.add(key + " worst = " + tImpossibleMinDepth); if (tImpossibleMaxDepth > maxDepth) impossibleMaxDepth.add(key + " worst = " + tImpossibleMaxDepth); if (tImpossibleMinTemperature < minTemperature) impossibleMinTemperature.add(key + " worst = " + tImpossibleMinTemperature); if (tImpossibleMaxTemperature > maxTemperature) impossibleMaxTemperature.add(key + " worst = " + tImpossibleMaxTemperature); if (tImpossibleMinSalinity < minSalinity) impossibleMinSalinity.add(key + " worst = " + tImpossibleMinSalinity); if (tImpossibleMaxSalinity > maxSalinity) impossibleMaxSalinity.add(key + " worst = " + tImpossibleMaxSalinity); //which table if (tnGood == 0) continue; int loni = Math2 .roundToInt(Math.floor((Math.min(lon, maxLon - 0.1f) - minLon) / chunkSize)); int lati = Math2 .roundToInt(Math.floor((Math.min(lat, maxLat - 0.1f) - minLat) / chunkSize)); String outTableName = (minLon + loni * chunkSize) + "E_" + (minLat + lati * chunkSize) + "N"; //String2.replaceAll(cruise + "_" + organization + dataType, ' ', '_'); //too many: 3000+/month in 2011 Table tTable = (Table) tableHashMap.get(outTableName); if (tTable == null) { Attributes ncGlobalAtts = new Attributes(); NcHelper.getGlobalAttributes(ncFile, ncGlobalAtts); String tHistory = ncGlobalAtts.getString("history"); tHistory = tHistory != null && tHistory.length() > 0 ? tHistory + "\n" : ""; //make a table for this platform tTable = new Table(); Attributes ga = tTable.globalAttributes(); String ack = "These data were acquired from the US NOAA National Oceanographic Data Center (NODC) on " + today + " from https://www.nodc.noaa.gov/GTSPP/."; ga.add("acknowledgment", ack); ga.add("license", "These data are openly available to the public. " + "Please acknowledge the use of these data with:\n" + ack + "\n\n" + "[standard]"); ga.add("history", tHistory + ".tgz files from ftp.nodc.noaa.gov /pub/gtspp/best_nc/ (https://www.nodc.noaa.gov/GTSPP/)\n" + today + " Most recent ingest, clean, and reformat at ERD (bob.simons at noaa.gov)."); ga.add("infoUrl", "https://www.nodc.noaa.gov/GTSPP/"); ga.add("institution", "NOAA NODC"); ga.add("title", "Global Temperature and Salinity Profile Programme (GTSPP) Data"); String attName = "gtspp_ConventionVersion"; String attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_program"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_programVersion"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_handbook_version"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); organizationCol = tTable.addColumn(tTable.nColumns(), "org", new StringArray(), new Attributes()); platformCol = tTable.addColumn(tTable.nColumns(), "platform", new StringArray(), new Attributes()); dataTypeCol = tTable.addColumn(tTable.nColumns(), "type", new StringArray(), new Attributes()); cruiseCol = tTable.addColumn(tTable.nColumns(), "cruise", new StringArray(), new Attributes()); stationCol = tTable.addColumn(tTable.nColumns(), "station_id", new IntArray(), new Attributes()); longitudeCol = tTable.addColumn(tTable.nColumns(), "longitude", new FloatArray(), (new Attributes()).add("units", EDV.LON_UNITS)); latitudeCol = tTable.addColumn(tTable.nColumns(), "latitude", new FloatArray(), (new Attributes()).add("units", EDV.LAT_UNITS)); timeCol = tTable.addColumn(tTable.nColumns(), "time", new DoubleArray(), (new Attributes()).add("units", EDV.TIME_UNITS)); depthCol = tTable.addColumn(tTable.nColumns(), "depth", new FloatArray(), (new Attributes()).add("units", "m")); temperatureCol = tTable.addColumn(tTable.nColumns(), "temperature", new FloatArray(), (new Attributes()).add("units", "degree_C")); salinityCol = tTable.addColumn(tTable.nColumns(), "salinity", new FloatArray(), (new Attributes()).add("units", "1e-3")); //PSU changed to 1e-3 with CF std names 25 tableHashMap.put(outTableName, tTable); } //put data in tTable int oNRows = tTable.nRows(); ((StringArray) tTable.getColumn(organizationCol)).addN(nDepth, organization); ((StringArray) tTable.getColumn(platformCol)).addN(nDepth, platform); ((StringArray) tTable.getColumn(dataTypeCol)).addN(nDepth, dataType); ((StringArray) tTable.getColumn(cruiseCol)).addN(nDepth, cruise); ((IntArray) tTable.getColumn(stationCol)).addN(nDepth, station); ((FloatArray) tTable.getColumn(longitudeCol)).addN(nDepth, lon); ((FloatArray) tTable.getColumn(latitudeCol)).addN(nDepth, lat); ((DoubleArray) tTable.getColumn(timeCol)).addN(nDepth, tTime); ((FloatArray) tTable.getColumn(depthCol)).append(depth); ((FloatArray) tTable.getColumn(temperatureCol)).append(temperature); ((FloatArray) tTable.getColumn(salinityCol)).append(salinity); //ensure the table is valid (same size for each column) tTable.ensureValid(); } catch (Throwable t) { nExceptions++; String2.log( "ERROR while processing " + sourceFileName + "\n " + MustBe.throwableToString(t)); } finally { //always close the ncFile if (ncFile != null) { try { ncFile.close(); } catch (Throwable t) { String2.log("ERROR: unable to close " + sourceFileName + "\n" + MustBe.getShortErrorMessage(t)); } } } } String2.log("\n time to read all those files = " + Calendar2.elapsedTimeString(System.currentTimeMillis() - fileReadTime)); //end of region loop String2.log("\nIn zip=" + sourceZipName + "\n nExceptions= " + nExceptions + " nWarnings=" + nWarnings + "\n nBadStation= " + nBadStation + " nGoodStation=" + nGoodStation + "\n nBadPos= " + nBadPos + " nGoodPos=" + nGoodPos + "\n nBadTime= " + nBadTime + " nGoodTime=" + nGoodTime + "\n nBadDepth= " + nBadDepth + " nGoodDepth=" + nGoodDepth + "\n nBadTemperature=" + nBadTemperature + " nGoodTemperature=" + nGoodTemperature + "\n nBadSalinity= " + nBadSalinity + " nGoodSalinity=" + nGoodSalinity); totalNGoodStation += nGoodStation; totalNGoodPos += nGoodPos; totalNGoodTime += nGoodTime; totalNGoodDepth += nGoodDepth; totalNGoodTemperature += nGoodTemperature; totalNGoodSalinity += nGoodSalinity; totalNGoodRows += nGoodRows; totalNBadPos += nBadPos; totalNBadTime += nBadTime; totalNBadDepth += nBadDepth; totalNBadTemperature += nBadTemperature; totalNBadSalinity += nBadSalinity; totalNBadRows += nBadRows; totalNWarnings += nWarnings; totalNExceptions += nExceptions; } //end of region loop //save by outTableName boolean filePrinted = false; Object keys[] = tableHashMap.keySet().toArray(); int nKeys = keys.length; String2.log("\n*** saving nFiles=" + nKeys); for (int keyi = 0; keyi < nKeys; keyi++) { String key = keys[keyi].toString(); Table tTable = (Table) tableHashMap.remove(key); if (tTable == null || tTable.nRows() == 0) { String2.log("Unexpected: no table for key=" + key); continue; } //sort by time, station, depth //depth matches the source files: from surface to deepest tTable.sort(new int[] { timeCol, stationCol, depthCol }, new boolean[] { true, true, true }); //is this saving a small lat lon range? double stationStats[] = tTable.getColumn(stationCol).calculateStats(); //double lonStats[] = tTable.getColumn(longitudeCol).calculateStats(); //double latStats[] = tTable.getColumn(latitudeCol).calculateStats(); //nLats++; //double latRange = latStats[PrimitiveArray.STATS_MAX] - latStats[PrimitiveArray.STATS_MIN]; //latSum += latRange; rowsSum += tTable.nRows(); String2.log(" stationRange=" + Math2.roundToInt( stationStats[PrimitiveArray.STATS_MAX] - stationStats[PrimitiveArray.STATS_MIN]) + //" lonRange=" + Math2.roundToInt(lonStats[ PrimitiveArray.STATS_MAX] - lonStats[ PrimitiveArray.STATS_MIN]) + //" latRange=" + Math2.roundToInt(latRange) + " nRows=" + tTable.nRows()); //save it String tName = tDestDir + String2.encodeFileNameSafe(key); /*if (lonStats[PrimitiveArray.STATS_MAX] > 45 && lonStats[PrimitiveArray.STATS_MIN] < -45) { //NO MORE: This happened with 1 file/cruise, // but won't happen now with lon/lat tiles. //crosses dateline (or widely across lon=0)? split into 2 files Table ttTable = (Table)tTable.clone(); ttTable.oneStepApplyConstraint(0, "longitude", "<", "0"); ttTable.saveAsFlatNc(tName + "_W.nc", "row", false); double lonStatsW[] = ttTable.getColumn(longitudeCol).calculateStats(); nLons++; double lonRangeW = lonStatsW[PrimitiveArray.STATS_MAX] - lonStatsW[PrimitiveArray.STATS_MIN]; lonSum += lonRangeW; ttTable = (Table)tTable.clone(); ttTable.oneStepApplyConstraint(0, "longitude", ">=", "0"); ttTable.saveAsFlatNc(tName + "_E.nc", "row", false); double lonStatsE[] = ttTable.getColumn(longitudeCol).calculateStats(); nLons++; double lonRangeE = lonStatsE[PrimitiveArray.STATS_MAX] - lonStatsE[PrimitiveArray.STATS_MIN]; lonSum += lonRangeE; String2.log(" westLonRange=" + Math2.roundToInt(lonRangeW) + " eastLonRange=" + Math2.roundToInt(lonRangeE)); } else */ { //nLons++; nFiles++; //create trajectory variable: platform + cruise StringArray pl = (StringArray) tTable.getColumn("platform"); StringArray cr = (StringArray) tTable.getColumn("cruise"); StringArray or = (StringArray) tTable.getColumn("org"); StringArray ty = (StringArray) tTable.getColumn("type"); StringArray tr = new StringArray(); int n = pl.size(); for (int i = 0; i < n; i++) { pl.set(i, String2.whitespacesToSpace(pl.get(i))); cr.set(i, String2.whitespacesToSpace(cr.get(i))); or.set(i, String2.whitespacesToSpace(or.get(i))); ty.set(i, String2.whitespacesToSpace(ty.get(i))); tr.add(or.getString(i) + "_" + ty.getString(i) + "_" + pl.getString(i) + "_" + cr.getString(i)); } tTable.addColumn(0, "trajectory", tr, new Attributes()); tTable.saveAsFlatNc(tName + ".nc", "row", false); //convertToFakeMissingValues (keep mv's as NaNs) } //print a file if (testMode && !filePrinted) { filePrinted = true; String2.log(NcHelper.dumpString(tName, true)); } } String2.log("\ncumulative nProfiles=" + profilesSum + " nRows=" + rowsSum + " mean nRows/file=" + (rowsSum / Math.max(1, nFiles))); //if (nLats > 0) // String2.log( "cumulative nLats=" + nLats + " meanLatRange=" + (float)(latSum / nLats)); //if (nLons > 0) { // String2.log( "cumulative nLons=" + nLons + " meanLonRange=" + (float)(lonSum / nLons)); // String2.log("mean nRows per saved file = " + (rowsSum / nLons)); //} //print list of impossible at end of year or end of run if (month == 12 || (year == lastYear && month == lastMonth)) { String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ() + " bobConsolidateGtsppTgz finished the chunk ending " + year + "-" + month + "\n" + "chunkTime=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - chunkTime)); chunkTime = System.currentTimeMillis(); //print impossible statistics String2.log("\nCumulative number of stations with:\n" + "impossibleNanLon = " + impossibleNanLon.size() + "\n" + "impossibleMinLon = " + impossibleMinLon.size() + "\n" + "impossibleMaxLon = " + impossibleMaxLon.size() + "\n" + "impossibleNanLat = " + impossibleNanLat.size() + "\n" + "impossibleMinLat = " + impossibleMinLat.size() + "\n" + "impossibleMaxLat = " + impossibleMaxLat.size() + "\n" + "impossibleMinDepth = " + impossibleMinDepth.size() + "\n" + "impossibleMaxDepth = " + impossibleMaxDepth.size() + "\n" + //"impossibleLatLon = " + impossibleLatLon.size() + "\n" + "impossibleMinTemperature = " + impossibleMinTemperature.size() + "\n" + "impossibleMaxTemperature = " + impossibleMaxTemperature.size() + "\n" + "impossibleMinSalinity = " + impossibleMinSalinity.size() + "\n" + "impossibleMaxSalinity = " + impossibleMaxSalinity.size() + "\n"); //lon String2.log("\n*** " + impossibleNanLon.size() + " stations had invalid lon" + " and good pos quality flags (" + okQFCsv + ")."); impossibleNanLon.sortIgnoreCase(); String2.log(impossibleNanLon.toNewlineString()); String2.log("\n*** " + impossibleMinLon.size() + " stations had lon<" + minLon + " and good pos quality flags (" + okQFCsv + ")."); impossibleMinLon.sortIgnoreCase(); String2.log(impossibleMinLon.toNewlineString()); String2.log("\n*** " + impossibleMaxLon.size() + " stations had lon>" + maxLon + " and good pos quality flags (" + okQFCsv + ")."); impossibleMaxLon.sortIgnoreCase(); String2.log(impossibleMaxLon.toNewlineString()); //lat String2.log("\n*** " + impossibleNanLat.size() + " stations had invalid lat" + " and good pos quality flags (" + okQFCsv + ")."); impossibleNanLat.sortIgnoreCase(); String2.log(impossibleNanLat.toNewlineString()); String2.log("\n*** " + impossibleMinLat.size() + " stations had lat<" + minLat + " and good pos quality flags (" + okQFCsv + ")."); impossibleMinLat.sortIgnoreCase(); String2.log(impossibleMinLat.toNewlineString()); String2.log("\n*** " + impossibleMaxLat.size() + " stations had lat>" + maxLat + " and good pos quality flags (" + okQFCsv + ")."); impossibleMaxLat.sortIgnoreCase(); String2.log(impossibleMaxLat.toNewlineString()); //depth String2.log("\n*** " + impossibleMinDepth.size() + " stations had depth<" + minDepth + " and good depth quality flags (" + okQFCsv + ")."); impossibleMinDepth.sortIgnoreCase(); String2.log(impossibleMinDepth.toNewlineString()); String2.log("\n*** " + impossibleMaxDepth.size() + " stations had depth>" + maxDepth + " and good depth quality flags (" + okQFCsv + ")."); impossibleMaxDepth.sortIgnoreCase(); String2.log(impossibleMaxDepth.toNewlineString()); //sa = impossibleLatLon.toArray(); //Arrays.sort(sa); //String2.log("\n*** " + sa.length + " stations had impossible latitude or longitude values" + // " and good q_pos quality flags."); //String2.log(String2.toNewlineString(sa)); String2.log("\n*** " + impossibleMinTemperature.size() + " stations had temperature<" + minTemperature + " and good temperature quality flags (" + okQFCsv + ")."); impossibleMinTemperature.sortIgnoreCase(); String2.log(impossibleMinTemperature.toNewlineString()); String2.log("\n*** " + impossibleMaxTemperature.size() + " stations had temperature>" + maxTemperature + " and good temperature quality flags (" + okQFCsv + ")."); impossibleMaxTemperature.sortIgnoreCase(); String2.log(impossibleMaxTemperature.toNewlineString()); String2.log("\n*** " + impossibleMinSalinity.size() + " stations had salinity<" + minSalinity + " and good salinity quality flags (" + okQFCsv + ")."); impossibleMinSalinity.sortIgnoreCase(); String2.log(impossibleMinSalinity.toNewlineString()); String2.log("\n*** " + impossibleMaxSalinity.size() + " stations had salinity>" + maxSalinity + " and good salinity quality flags (" + okQFCsv + ")."); impossibleMaxSalinity.sortIgnoreCase(); String2.log(impossibleMaxSalinity.toNewlineString()); } //are we done? if (year == lastYear && month == lastMonth) break; //increment the month month++; if (month == 13) { year++; month = 1; } } //end of month/year loop String2.log("\n*** bobConsolidateGtspp completely finished " + firstYear + "-" + firstMonth + " through " + lastYear + "-" + lastMonth); String2.log("\n***" + "\ntotalNExceptions= " + totalNExceptions + " totalNWarnings= " + totalNWarnings + "\ntotalNBadStation= " + totalNBadStation + " totalNGoodStation= " + totalNGoodStation + "\ntotalNBadPos= " + totalNBadPos + " totalNGoodPos= " + totalNGoodPos + "\ntotalNBadTime= " + totalNBadTime + " totalNGoodTime= " + totalNGoodTime + "\ntotalNBadDepth= " + totalNBadDepth + " totalNGoodDepth= " + totalNGoodDepth + "\ntotalNBadTemperature=" + totalNBadTemperature + " totalNGoodTemperature=" + totalNGoodTemperature + "\ntotalNBadSalinity= " + totalNBadSalinity + " totalNGoodSalinity= " + totalNGoodSalinity + "\ntotalNBadRows= " + totalNBadRows + " totalNGoodRows= " + totalNGoodRows + "\nlogFile=F:/data/gtspp/log.txt" + "\n\n*** all finished time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - elapsedTime)); String2.returnLoggingToSystemOut(); }