List of usage examples for java.util BitSet cardinality
public int cardinality()
From source file:uk.ac.ebi.orchem.search.SimilaritySearch.java
/** * Performs a similarity search between a query molecule and the orchem fingerprint table. * * @param queryFp fingerprint of the query molecule * @param _cutOff tanimoto score below which to stop searching * @param _topN top N results after which to stop searching * @param debugYN Y or N to debug output back * @param idsOnlyYN Y or N to indicate to just return IDs of results (faster) * @param extraWhereClause option to include an extra SQL where clause refering to the base compound table * @return array of {@link uk.ac.ebi.orchem.bean.OrChemCompound compounds} * @throws Exception/*from ww w . j a va 2 s. c om*/ */ private static oracle.sql.ARRAY search(BitSet queryFp, Float _cutOff, Integer _topN, String debugYN, String idsOnlyYN, String extraWhereClause) throws Exception { /* * The comment block below describes the search algorithm. From: "Bounds and Algorithms for Fast Exact Searches of Chemical Fingerprints in Linear and Sub-Linear Time" S.Joshua Swamidass and Pierre Baldi http://dx.doi.org/10.1021/ci600358f Top K Hits ---------- We can search for the top K hits by starting from the maximum (where A=B), and exploring discrete possible values of B right and left of the maximum. More precisely, for binary fingerprints, we first index the molecules in the database by their fingerprint "bit count" to enable efficient referencing of a particular bit count bin. Next, with respect to a particular query, we calculate the bound on the similarity for every bit count in the database. Then we sort these bit counts by their associated bound and iterate over the molecules in the database, in order of decreasing bound. As we iterate, we calculate the similarity between the query and the database molecule and use a heap to efficiently track the top hits. The algorithm terminates when "the lowest similarity value in the heap is greater than the bound associated with the current database bin" Algorithm 1 Top K Search Require: database of fingerprints binned by bit count Bs Ensure: hits contains top K hits which satisfy SIMILARITY( ) > T 1: hits <- MINHEAP() 2: bounds <- LIST() 3: for all B in database do //iterate over bins 4: tuple <- TUPLE(BOUND(A,B),B) 5: LISTAPPEND(bounds, tuple) 6: end for 7: QUICKSORT(bounds) //NOTE: the length of bounds is constant 8: for all bound, B in bounds do //iterate in order of decreasing bound 9: if bound < T then 10: break //threshold stopping condition 11: end if 12: if K HEAPSIZE(hits) and bound < MINSIMILARITY(hits) then 13: break //top-K stopping condition 14: end if 15: for all in database[B] do 16: S=SIMILARITY( ) 17: tuple <- TUPLE(S, ) 18: if S T then 19: continue //ignore this and continue to next 20: else if LENGTH(hits)< K then 21: HEAPPUSH(hits, tuple) 22: else if S > MINSIMILARITY(hits) then 23: HEAPPOPMIN(hits) 24: HEAPPUSH(hits,tuple) 25: end if 26: end for 27: end for 28: return hits */ boolean debugging = false; if (debugYN.toLowerCase().equals("y")) debugging = true; debug("started", debugging); /********************************************************************** * Similarity search algorithm section * * * **********************************************************************/ Comparator heapComparator = new SimHeapElementTanimComparator(); PriorityBuffer heap = null; OracleConnection conn = null; PreparedStatement pstmtFp = null; PreparedStatement pstmLookup = null; String query = " select bit_count, id, fp from orchem_fingprint_simsearch s where bit_count = ? "; float cutOff = _cutOff.floatValue(); int topN = -1; if (_topN == null) { debug("No topN breakout specified.. searching until lower bound reached", debugging); } else { topN = _topN.intValue(); debug("topN is " + topN + ", result set size limited.", debugging); } try { conn = (OracleConnection) new OracleDriver().defaultConnection(); String compoundTableName = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_TABLE, conn); String compoundTablePkColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_PK, conn); String compoundTableMolfileColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_MOL, conn); if (extraWhereClause != null) { query = " select s.bit_count, s.id, s.fp from " + " orchem_fingprint_simsearch s , " + compoundTableName + " c " + " where s.bit_count = ? " + " and s.id = c." + compoundTablePkColumn + " " + " and " + extraWhereClause; debug("QUERY is " + query, debugging); } float queryBitCount = queryFp.cardinality(); byte[] queryBytes = Utils.toByteArray(queryFp, extFpSize); int queryByteArrLen = queryBytes.length; float lowBucketNum = queryBitCount - 1; float highBucketNum = queryBitCount + 1; float currBucketNum = queryBitCount; pstmtFp = conn.prepareStatement(query); pstmtFp.setFetchSize(250); ResultSet resFp = null; boolean done = false; byte[] dbByteArray = null; float tanimotoCoeff = 0f; heap = new PriorityBuffer(true, heapComparator); int bucksSearched = 0; int loopCount = 0; while (!done) { debug("bucket is " + currBucketNum, debugging); loopCount++; pstmtFp.setFloat(1, currBucketNum); bucksSearched++; resFp = pstmtFp.executeQuery(); float bound = 0f; if (currBucketNum < queryBitCount) bound = currBucketNum / queryBitCount; else bound = queryBitCount / currBucketNum; /* Algorithm step 9..11 Here we can break out because the tanimoto score is becoming to low */ if (bound < cutOff) { debug("bound < cutOff, done", debugging); done = true; } if (!done) { //Algorithm 15-26 while (resFp.next()) { dbByteArray = resFp.getBytes("fp"); tanimotoCoeff = calcTanimoto(queryBytes, queryByteArrLen, dbByteArray, queryBitCount, currBucketNum); if (tanimotoCoeff >= cutOff) { SimHeapElement elm = new SimHeapElement(); elm.setID(resFp.getString("id")); elm.setTanimotoCoeff(new Float(tanimotoCoeff)); if (heap.size() < topN || topN == -1) { heap.add(elm); debug("add elem " + elm.getID(), debugging); } else if (tanimotoCoeff > ((SimHeapElement) (heap.get())).getTanimotoCoeff() .floatValue()) { heap.remove(); heap.add(elm); debug("remove + add elem " + elm.getID(), debugging); } } } resFp.close(); /* Algorithm 12-14: * When top N hits is reached, and the lowest score of the * hits is greater than the current bucket bound, stop. * If not, the next bucket may contain a better score, so go on. */ if (topN != -1 && heap.size() >= topN && ((SimHeapElement) (heap.get())).getTanimotoCoeff().floatValue() > bound) { done = true; debug("topN reached, done", debugging); } else { // calculate new currBucket float up = queryBitCount / highBucketNum; float down = lowBucketNum / queryBitCount; if (up > down) { currBucketNum = highBucketNum; highBucketNum++; } else { currBucketNum = lowBucketNum; lowBucketNum--; } if (lowBucketNum < 1 && highBucketNum > extFpSize) done = true; } } } debug("searched bit_count buckets: " + loopCount, debugging); /******************************************************************** * Search completed. * * * * Next section is just looking up the compounds by ID and * * returning the results, sorted by Tanimoto coefficient * * * *******************************************************************/ String lookupCompoundQuery = " select " + compoundTableMolfileColumn + " from " + " " + compoundTableName + " where " + " " + compoundTablePkColumn + " =?"; pstmLookup = conn.prepareStatement(lookupCompoundQuery); List compounds = new ArrayList(); while (heap.size() != 0) { SimHeapElement bElm = (SimHeapElement) heap.remove(); if (idsOnlyYN.equals("N")) { // return structure to user pstmLookup.setString(1, bElm.getID()); ResultSet resLookup = pstmLookup.executeQuery(); if (resLookup.next()) { OrChemCompound c = new OrChemCompound(); c.setId(bElm.getID()); c.setScore(bElm.getTanimotoCoeff().floatValue()); c.setMolFileClob(resLookup.getClob(compoundTableMolfileColumn)); compounds.add(c); } resLookup.close(); } else { // only return ID and score to user OrChemCompound c = new OrChemCompound(); c.setId(bElm.getID()); c.setScore(bElm.getTanimotoCoeff().floatValue()); compounds.add(c); } } pstmLookup.close(); long befSort = System.currentTimeMillis(); Collections.sort(compounds, new OrChemCompoundTanimComparator()); debug("sorting time (ms) " + (System.currentTimeMillis() - befSort), debugging); OrChemCompound[] output = new OrChemCompound[compounds.size()]; for (int i = 0; i < compounds.size(); i++) { output[i] = (OrChemCompound) (compounds.get(i)); } ArrayDescriptor arrayDescriptor = ArrayDescriptor.createDescriptor("ORCHEM_COMPOUND_LIST", conn); debug("#compounds in result list : " + compounds.size(), debugging); debug("ended", debugging); return new ARRAY(arrayDescriptor, conn, output); } catch (Exception ex) { ex.printStackTrace(); throw (ex); } finally { if (pstmLookup != null) pstmLookup.close(); if (pstmtFp != null) pstmtFp.close(); if (conn != null) conn.close(); } }
From source file:org.apache.openjpa.kernel.BrokerImpl.java
public boolean isCached(List<Object> oids) { BitSet loaded = new BitSet(oids.size()); //check L1 cache first for (int i = 0; i < oids.size(); i++) { Object oid = oids.get(i); if (_cache.getById(oid, false) != null) { loaded.set(i);/*from w w w . j a v a 2 s. c o m*/ } } if (loaded.cardinality() == oids.size()) { return true; } return _store.isCached(oids, loaded); }
From source file:edu.uci.ics.asterix.optimizer.rules.am.BTreeAccessMethod.java
private ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalOperator> topOpRef, Mutable<ILogicalExpression> conditionRef, OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context) throws AlgebricksException { Dataset dataset = indexSubTree.dataset; ARecordType recordType = indexSubTree.recordType; // we made sure indexSubTree has datasource scan AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.dataSourceRef .getValue();/*from w w w.j a va2 s .c o m*/ List<Pair<Integer, Integer>> exprAndVarList = analysisCtx.indexExprsAndVars.get(chosenIndex); List<IOptimizableFuncExpr> matchedFuncExprs = analysisCtx.matchedFuncExprs; int numSecondaryKeys = analysisCtx.indexNumMatchedKeys.get(chosenIndex); // List of function expressions that will be replaced by the secondary-index search. // These func exprs will be removed from the select condition at the very end of this method. Set<ILogicalExpression> replacedFuncExprs = new HashSet<ILogicalExpression>(); // Info on high and low keys for the BTree search predicate. ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys]; ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys]; LimitType[] lowKeyLimits = new LimitType[numSecondaryKeys]; LimitType[] highKeyLimits = new LimitType[numSecondaryKeys]; boolean[] lowKeyInclusive = new boolean[numSecondaryKeys]; boolean[] highKeyInclusive = new boolean[numSecondaryKeys]; // TODO: For now we don't do any sophisticated analysis of the func exprs to come up with "the best" range predicate. // If we can't figure out how to integrate a certain funcExpr into the current predicate, we just bail by setting this flag. boolean couldntFigureOut = false; boolean doneWithExprs = false; boolean isEqCondition = false; // TODO: For now don't consider prefix searches. BitSet setLowKeys = new BitSet(numSecondaryKeys); BitSet setHighKeys = new BitSet(numSecondaryKeys); // Go through the func exprs listed as optimizable by the chosen index, // and formulate a range predicate on the secondary-index keys. // checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value // since we have a round issues when dealing with LT(<) OR GT(>) operator. boolean realTypeConvertedToIntegerType = false; for (Pair<Integer, Integer> exprIndex : exprAndVarList) { // Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs. IOptimizableFuncExpr optFuncExpr = matchedFuncExprs.get(exprIndex.first); int keyPos = indexOf(optFuncExpr.getFieldName(0), chosenIndex.getKeyFieldNames()); if (keyPos < 0) { if (optFuncExpr.getNumLogicalVars() > 1) { // If we are optimizing a join, the matching field may be the second field name. keyPos = indexOf(optFuncExpr.getFieldName(1), chosenIndex.getKeyFieldNames()); } } if (keyPos < 0) { throw new AlgebricksException( "Could not match optimizable function expression to any index field name."); } Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils .createSearchKeyExpr(optFuncExpr, indexSubTree, probeSubTree); ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first; realTypeConvertedToIntegerType = returnedSearchKeyExpr.second; LimitType limit = getLimitType(optFuncExpr, probeSubTree); // If a DOUBLE or FLOAT constant is converted to an INT type value, // we need to check a corner case where two real values are located between an INT value. // For example, for the following query, // // for $emp in dataset empDataset // where $emp.age > double("2.3") and $emp.age < double("3.3") // return $emp.id; // // It should generate a result if there is a tuple that satisfies the condition, which is 3, // however, it does not generate the desired result since finding candidates // fail after truncating the fraction part (there is no INT whose value is greater than 2 and less than 3.) // // Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates. // This does not change the result of an actual comparison since this conversion is only applied // for finding candidates from an index. // if (realTypeConvertedToIntegerType) { if (limit == LimitType.HIGH_EXCLUSIVE) { limit = LimitType.HIGH_INCLUSIVE; } else if (limit == LimitType.LOW_EXCLUSIVE) { limit = LimitType.LOW_INCLUSIVE; } } switch (limit) { case EQUAL: { if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) { lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit; lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true; lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr; setLowKeys.set(keyPos); setHighKeys.set(keyPos); isEqCondition = true; } else { // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true && lowKeyExprs[keyPos].equals(searchKeyExpr) && highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) { isEqCondition = true; break; } couldntFigureOut = true; } // TODO: For now don't consider prefix searches. // If high and low keys are set, we exit for now. if (setLowKeys.cardinality() == numSecondaryKeys && setHighKeys.cardinality() == numSecondaryKeys) { doneWithExprs = true; } break; } case HIGH_EXCLUSIVE: { if (highKeyLimits[keyPos] == null || (highKeyLimits[keyPos] != null && highKeyInclusive[keyPos])) { highKeyLimits[keyPos] = limit; highKeyExprs[keyPos] = searchKeyExpr; highKeyInclusive[keyPos] = false; } else { // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == false && highKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case HIGH_INCLUSIVE: { if (highKeyLimits[keyPos] == null) { highKeyLimits[keyPos] = limit; highKeyExprs[keyPos] = searchKeyExpr; highKeyInclusive[keyPos] = true; } else { // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case LOW_EXCLUSIVE: { if (lowKeyLimits[keyPos] == null || (lowKeyLimits[keyPos] != null && lowKeyInclusive[keyPos])) { lowKeyLimits[keyPos] = limit; lowKeyExprs[keyPos] = searchKeyExpr; lowKeyInclusive[keyPos] = false; } else { // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == false && lowKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case LOW_INCLUSIVE: { if (lowKeyLimits[keyPos] == null) { lowKeyLimits[keyPos] = limit; lowKeyExprs[keyPos] = searchKeyExpr; lowKeyInclusive[keyPos] = true; } else { // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true && lowKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } default: { throw new IllegalStateException(); } } if (!couldntFigureOut) { // Remember to remove this funcExpr later. replacedFuncExprs.add(matchedFuncExprs.get(exprIndex.first).getFuncExpr()); } if (doneWithExprs) { break; } } if (couldntFigureOut) { return null; } // If the select condition contains mixed open/closed intervals on multiple keys, then we make all intervals closed to obtain a superset of answers and leave the original selection in place. boolean primaryIndexPostProccessingIsNeeded = false; for (int i = 1; i < numSecondaryKeys; ++i) { if (lowKeyInclusive[i] != lowKeyInclusive[0]) { Arrays.fill(lowKeyInclusive, true); primaryIndexPostProccessingIsNeeded = true; break; } } for (int i = 1; i < numSecondaryKeys; ++i) { if (highKeyInclusive[i] != highKeyInclusive[0]) { Arrays.fill(highKeyInclusive, true); primaryIndexPostProccessingIsNeeded = true; break; } } // determine cases when prefix search could be applied for (int i = 1; i < lowKeyExprs.length; i++) { if (lowKeyLimits[0] == null && lowKeyLimits[i] != null || lowKeyLimits[0] != null && lowKeyLimits[i] == null || highKeyLimits[0] == null && highKeyLimits[i] != null || highKeyLimits[0] != null && highKeyLimits[i] == null) { numSecondaryKeys--; primaryIndexPostProccessingIsNeeded = true; } } if (lowKeyLimits[0] == null) { lowKeyInclusive[0] = true; } if (highKeyLimits[0] == null) { highKeyInclusive[0] = true; } // Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index search. // List of variables for the assign. ArrayList<LogicalVariable> keyVarList = new ArrayList<LogicalVariable>(); // List of variables and expressions for the assign. ArrayList<LogicalVariable> assignKeyVarList = new ArrayList<LogicalVariable>(); ArrayList<Mutable<ILogicalExpression>> assignKeyExprList = new ArrayList<Mutable<ILogicalExpression>>(); int numLowKeys = createKeyVarsAndExprs(numSecondaryKeys, lowKeyLimits, lowKeyExprs, assignKeyVarList, assignKeyExprList, keyVarList, context); int numHighKeys = createKeyVarsAndExprs(numSecondaryKeys, highKeyLimits, highKeyExprs, assignKeyVarList, assignKeyExprList, keyVarList, context); BTreeJobGenParams jobGenParams = new BTreeJobGenParams(chosenIndex.getIndexName(), IndexType.BTREE, dataset.getDataverseName(), dataset.getDatasetName(), retainInput, retainNull, requiresBroadcast); jobGenParams.setLowKeyInclusive(lowKeyInclusive[0]); jobGenParams.setHighKeyInclusive(highKeyInclusive[0]); jobGenParams.setIsEqCondition(isEqCondition); jobGenParams.setLowKeyVarList(keyVarList, 0, numLowKeys); jobGenParams.setHighKeyVarList(keyVarList, numLowKeys, numHighKeys); ILogicalOperator inputOp = null; if (!assignKeyVarList.isEmpty()) { // Assign operator that sets the constant secondary-index search-key fields if necessary. AssignOperator assignConstantSearchKeys = new AssignOperator(assignKeyVarList, assignKeyExprList); // Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input). assignConstantSearchKeys.getInputs().add(dataSourceOp.getInputs().get(0)); assignConstantSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode()); inputOp = assignConstantSearchKeys; } else { // All index search keys are variables. inputOp = probeSubTree.root; } UnnestMapOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType, chosenIndex, inputOp, jobGenParams, context, false, retainInput); // Generate the rest of the upstream plan which feeds the search results into the primary index. UnnestMapOperator primaryIndexUnnestOp = null; boolean isPrimaryIndex = chosenIndex.isPrimaryIndex(); if (dataset.getDatasetType() == DatasetType.EXTERNAL) { // External dataset ExternalDataLookupOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap( dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput, retainNull); indexSubTree.dataSourceRef.setValue(externalDataAccessOp); return externalDataAccessOp; } else if (!isPrimaryIndex) { primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, true, retainInput, retainNull, false); // Replace the datasource scan with the new plan rooted at // primaryIndexUnnestMap. indexSubTree.dataSourceRef.setValue(primaryIndexUnnestOp); } else { List<Object> primaryIndexOutputTypes = new ArrayList<Object>(); try { AccessMethodUtils.appendPrimaryIndexTypes(dataset, recordType, primaryIndexOutputTypes); } catch (IOException e) { throw new AlgebricksException(e); } List<LogicalVariable> scanVariables = dataSourceOp.getVariables(); primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, secondaryIndexUnnestOp.getExpressionRef(), primaryIndexOutputTypes, retainInput); primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp)); if (!primaryIndexPostProccessingIsNeeded) { List<Mutable<ILogicalExpression>> remainingFuncExprs = new ArrayList<Mutable<ILogicalExpression>>(); getNewConditionExprs(conditionRef, replacedFuncExprs, remainingFuncExprs); // Generate new condition. if (!remainingFuncExprs.isEmpty()) { ILogicalExpression pulledCond = createSelectCondition(remainingFuncExprs); conditionRef.setValue(pulledCond); } else { conditionRef.setValue(null); } } // Adds equivalence classes --- one equivalent class between a primary key // variable and a record field-access expression. EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, scanVariables, recordType, dataset, context); } return primaryIndexUnnestOp; }
From source file:org.apache.asterix.optimizer.rules.am.BTreeAccessMethod.java
@Override public ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef, OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context) throws AlgebricksException { Dataset dataset = indexSubTree.getDataset(); ARecordType recordType = indexSubTree.getRecordType(); ARecordType metaRecordType = indexSubTree.getMetaRecordType(); // we made sure indexSubTree has datasource scan AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.getDataSourceRef() .getValue();//from ww w. ja va 2s. c o m List<Pair<Integer, Integer>> exprAndVarList = analysisCtx.indexExprsAndVars.get(chosenIndex); List<IOptimizableFuncExpr> matchedFuncExprs = analysisCtx.matchedFuncExprs; int numSecondaryKeys = analysisCtx.indexNumMatchedKeys.get(chosenIndex); // List of function expressions that will be replaced by the secondary-index search. // These func exprs will be removed from the select condition at the very end of this method. Set<ILogicalExpression> replacedFuncExprs = new HashSet<>(); // Info on high and low keys for the BTree search predicate. ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys]; ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys]; LimitType[] lowKeyLimits = new LimitType[numSecondaryKeys]; LimitType[] highKeyLimits = new LimitType[numSecondaryKeys]; boolean[] lowKeyInclusive = new boolean[numSecondaryKeys]; boolean[] highKeyInclusive = new boolean[numSecondaryKeys]; ILogicalExpression[] constantAtRuntimeExpressions = new ILogicalExpression[numSecondaryKeys]; LogicalVariable[] constAtRuntimeExprVars = new LogicalVariable[numSecondaryKeys]; /* TODO: For now we don't do any sophisticated analysis of the func exprs to come up with "the best" range * predicate. If we can't figure out how to integrate a certain funcExpr into the current predicate, * we just bail by setting this flag.*/ boolean couldntFigureOut = false; boolean doneWithExprs = false; boolean isEqCondition = false; BitSet setLowKeys = new BitSet(numSecondaryKeys); BitSet setHighKeys = new BitSet(numSecondaryKeys); // Go through the func exprs listed as optimizable by the chosen index, // and formulate a range predicate on the secondary-index keys. // checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value // since we have a round issues when dealing with LT(<) OR GT(>) operator. boolean realTypeConvertedToIntegerType; for (Pair<Integer, Integer> exprIndex : exprAndVarList) { // Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs. IOptimizableFuncExpr optFuncExpr = matchedFuncExprs.get(exprIndex.first); int keyPos = indexOf(optFuncExpr.getFieldName(0), chosenIndex.getKeyFieldNames()); if (keyPos < 0 && optFuncExpr.getNumLogicalVars() > 1) { // If we are optimizing a join, the matching field may be the second field name. keyPos = indexOf(optFuncExpr.getFieldName(1), chosenIndex.getKeyFieldNames()); } if (keyPos < 0) { throw new AlgebricksException( "Could not match optimizable function expression to any index field name."); } Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils .createSearchKeyExpr(optFuncExpr, indexSubTree, probeSubTree); ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first; if (searchKeyExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { constantAtRuntimeExpressions[keyPos] = searchKeyExpr; constAtRuntimeExprVars[keyPos] = context.newVar(); searchKeyExpr = new VariableReferenceExpression(constAtRuntimeExprVars[keyPos]); } realTypeConvertedToIntegerType = returnedSearchKeyExpr.second; LimitType limit = getLimitType(optFuncExpr, probeSubTree); // If a DOUBLE or FLOAT constant is converted to an INT type value, // we need to check a corner case where two real values are located between an INT value. // For example, for the following query, // // for $emp in dataset empDataset // where $emp.age > double("2.3") and $emp.age < double("3.3") // return $emp.id // // It should generate a result if there is a tuple that satisfies the condition, which is 3, // however, it does not generate the desired result since finding candidates // fail after truncating the fraction part (there is no INT whose value is greater than 2 and less than 3.) // // Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates. // This does not change the result of an actual comparison since this conversion is only applied // for finding candidates from an index. // if (realTypeConvertedToIntegerType) { if (limit == LimitType.HIGH_EXCLUSIVE) { limit = LimitType.HIGH_INCLUSIVE; } else if (limit == LimitType.LOW_EXCLUSIVE) { limit = LimitType.LOW_INCLUSIVE; } } switch (limit) { case EQUAL: { if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) { lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit; lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true; lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr; setLowKeys.set(keyPos); setHighKeys.set(keyPos); isEqCondition = true; } else { // Has already been set to the identical values. // When optimizing join we may encounter the same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true && lowKeyExprs[keyPos].equals(searchKeyExpr) && highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) { isEqCondition = true; break; } couldntFigureOut = true; } // TODO: For now don't consider prefix searches. // If high and low keys are set, we exit for now. if (setLowKeys.cardinality() == numSecondaryKeys && setHighKeys.cardinality() == numSecondaryKeys) { doneWithExprs = true; } break; } case HIGH_EXCLUSIVE: { if (highKeyLimits[keyPos] == null || (highKeyLimits[keyPos] != null && highKeyInclusive[keyPos])) { highKeyLimits[keyPos] = limit; highKeyExprs[keyPos] = searchKeyExpr; highKeyInclusive[keyPos] = false; } else { // Has already been set to the identical values. When optimizing join we may encounter the // same optimizable expression twice // (once from analyzing each side of the join) if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == false && highKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case HIGH_INCLUSIVE: { if (highKeyLimits[keyPos] == null) { highKeyLimits[keyPos] = limit; highKeyExprs[keyPos] = searchKeyExpr; highKeyInclusive[keyPos] = true; } else { // Has already been set to the identical values. When optimizing join we may encounter the // same optimizable expression twice // (once from analyzing each side of the join) if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case LOW_EXCLUSIVE: { if (lowKeyLimits[keyPos] == null || (lowKeyLimits[keyPos] != null && lowKeyInclusive[keyPos])) { lowKeyLimits[keyPos] = limit; lowKeyExprs[keyPos] = searchKeyExpr; lowKeyInclusive[keyPos] = false; } else { // Has already been set to the identical values. When optimizing join we may encounter the // same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == false && lowKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } case LOW_INCLUSIVE: { if (lowKeyLimits[keyPos] == null) { lowKeyLimits[keyPos] = limit; lowKeyExprs[keyPos] = searchKeyExpr; lowKeyInclusive[keyPos] = true; } else { // Has already been set to the identical values. When optimizing join we may encounter the // same optimizable expression twice // (once from analyzing each side of the join) if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true && lowKeyExprs[keyPos].equals(searchKeyExpr)) { break; } couldntFigureOut = true; doneWithExprs = true; } break; } default: { throw new IllegalStateException(); } } if (!couldntFigureOut) { // Remember to remove this funcExpr later. replacedFuncExprs.add(matchedFuncExprs.get(exprIndex.first).getFuncExpr()); } if (doneWithExprs) { break; } } if (couldntFigureOut) { return null; } // If the select condition contains mixed open/closed intervals on multiple keys, then we make all intervals // closed to obtain a superset of answers and leave the original selection in place. boolean primaryIndexPostProccessingIsNeeded = false; for (int i = 1; i < numSecondaryKeys; ++i) { if (lowKeyInclusive[i] != lowKeyInclusive[0]) { Arrays.fill(lowKeyInclusive, true); primaryIndexPostProccessingIsNeeded = true; break; } } for (int i = 1; i < numSecondaryKeys; ++i) { if (highKeyInclusive[i] != highKeyInclusive[0]) { Arrays.fill(highKeyInclusive, true); primaryIndexPostProccessingIsNeeded = true; break; } } // determine cases when prefix search could be applied for (int i = 1; i < lowKeyExprs.length; i++) { if (lowKeyLimits[0] == null && lowKeyLimits[i] != null || lowKeyLimits[0] != null && lowKeyLimits[i] == null || highKeyLimits[0] == null && highKeyLimits[i] != null || highKeyLimits[0] != null && highKeyLimits[i] == null) { numSecondaryKeys--; primaryIndexPostProccessingIsNeeded = true; } } if (lowKeyLimits[0] == null) { lowKeyInclusive[0] = true; } if (highKeyLimits[0] == null) { highKeyInclusive[0] = true; } // Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index // search. // List of variables for the assign. ArrayList<LogicalVariable> keyVarList = new ArrayList<LogicalVariable>(); // List of variables and expressions for the assign. ArrayList<LogicalVariable> assignKeyVarList = new ArrayList<LogicalVariable>(); ArrayList<Mutable<ILogicalExpression>> assignKeyExprList = new ArrayList<Mutable<ILogicalExpression>>(); int numLowKeys = createKeyVarsAndExprs(numSecondaryKeys, lowKeyLimits, lowKeyExprs, assignKeyVarList, assignKeyExprList, keyVarList, context, constantAtRuntimeExpressions, constAtRuntimeExprVars); int numHighKeys = createKeyVarsAndExprs(numSecondaryKeys, highKeyLimits, highKeyExprs, assignKeyVarList, assignKeyExprList, keyVarList, context, constantAtRuntimeExpressions, constAtRuntimeExprVars); BTreeJobGenParams jobGenParams = new BTreeJobGenParams(chosenIndex.getIndexName(), IndexType.BTREE, dataset.getDataverseName(), dataset.getDatasetName(), retainInput, requiresBroadcast); jobGenParams.setLowKeyInclusive(lowKeyInclusive[0]); jobGenParams.setHighKeyInclusive(highKeyInclusive[0]); jobGenParams.setIsEqCondition(isEqCondition); jobGenParams.setLowKeyVarList(keyVarList, 0, numLowKeys); jobGenParams.setHighKeyVarList(keyVarList, numLowKeys, numHighKeys); ILogicalOperator inputOp = null; if (!assignKeyVarList.isEmpty()) { // Assign operator that sets the constant secondary-index search-key fields if necessary. AssignOperator assignConstantSearchKeys = new AssignOperator(assignKeyVarList, assignKeyExprList); // Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input). assignConstantSearchKeys.getInputs().add(new MutableObject<ILogicalOperator>( OperatorManipulationUtil.deepCopy(dataSourceOp.getInputs().get(0).getValue()))); assignConstantSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode()); inputOp = assignConstantSearchKeys; } else { // All index search keys are variables. inputOp = probeSubTree.getRoot(); } ILogicalOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType, metaRecordType, chosenIndex, inputOp, jobGenParams, context, false, retainInput, retainNull); // Generate the rest of the upstream plan which feeds the search results into the primary index. AbstractUnnestMapOperator primaryIndexUnnestOp = null; boolean isPrimaryIndex = chosenIndex.isPrimaryIndex(); if (dataset.getDatasetType() == DatasetType.EXTERNAL) { // External dataset UnnestMapOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap( dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput, retainNull); indexSubTree.getDataSourceRef().setValue(externalDataAccessOp); return externalDataAccessOp; } else if (!isPrimaryIndex) { primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType, metaRecordType, secondaryIndexUnnestOp, context, true, retainInput, retainNull, false); // Adds equivalence classes --- one equivalent class between a primary key // variable and a record field-access expression. EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, dataSourceOp.getVariables(), recordType, metaRecordType, dataset, context); } else { List<Object> primaryIndexOutputTypes = new ArrayList<Object>(); AccessMethodUtils.appendPrimaryIndexTypes(dataset, recordType, metaRecordType, primaryIndexOutputTypes); List<LogicalVariable> scanVariables = dataSourceOp.getVariables(); // Checks whether the primary index search can replace the given // SELECT condition. // If so, condition will be set to null and eventually the SELECT // operator will be removed. // If not, we create a new condition based on remaining ones. if (!primaryIndexPostProccessingIsNeeded) { List<Mutable<ILogicalExpression>> remainingFuncExprs = new ArrayList<Mutable<ILogicalExpression>>(); getNewConditionExprs(conditionRef, replacedFuncExprs, remainingFuncExprs); // Generate new condition. if (!remainingFuncExprs.isEmpty()) { ILogicalExpression pulledCond = createSelectCondition(remainingFuncExprs); conditionRef.setValue(pulledCond); } else { conditionRef.setValue(null); } } // Checks whether LEFT_OUTER_UNNESTMAP operator is required. boolean leftOuterUnnestMapRequired = false; if (retainNull && retainInput) { leftOuterUnnestMapRequired = true; } else { leftOuterUnnestMapRequired = false; } if (conditionRef.getValue() != null) { // The job gen parameters are transferred to the actual job gen // via the UnnestMapOperator's function arguments. List<Mutable<ILogicalExpression>> primaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>(); jobGenParams.writeToFuncArgs(primaryIndexFuncArgs); // An index search is expressed as an unnest-map over an // index-search function. IFunctionInfo primaryIndexSearch = FunctionUtil .getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH); UnnestingFunctionCallExpression primaryIndexSearchFunc = new UnnestingFunctionCallExpression( primaryIndexSearch, primaryIndexFuncArgs); primaryIndexSearchFunc.setReturnsUniqueValues(true); if (!leftOuterUnnestMapRequired) { primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, retainInput); } else { primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables, new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, true); } } else { if (!leftOuterUnnestMapRequired) { primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, ((UnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(), primaryIndexOutputTypes, retainInput); } else { primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables, ((LeftOuterUnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(), primaryIndexOutputTypes, true); } } primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp)); // Adds equivalence classes --- one equivalent class between a primary key // variable and a record field-access expression. EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, scanVariables, recordType, metaRecordType, dataset, context); } return primaryIndexUnnestOp; }
From source file:gov.noaa.pfel.erddap.dataset.EDDTableFromNcFiles.java
/** NOT FOR GENERAL USE. Bob uses this to consolidate the individual GTSPP * data files into 30 x 30 x 1 month files (tiles). * 30 x 30 leads to 12x6=72 files for a given time point, so a request * for a short time but entire world opens ~72 files. * There are ~240 months worth of data, so a request for a small lon lat * range for all time opens ~240 files.//from w w w . jav a 2s. c o m * * <p>Why tile? Because there are ~10^6 profiles/year now, so ~10^7 total. * And if 100 bytes of info per file for EDDTableFromFiles fileTable, that's 1 GB!. * So there needs to be fewer files. * We want to balance number of files for 1 time point (all region tiles), * and number of time point files (I'll stick with their use of 1 month). * The tiling size selected is ok, but searches for single profile (by name) * are slow since a given file may have a wide range of station_ids. * * <p>Quality flags * <br>https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf * <br>http://www.ifremer.fr/gosud/formats/gtspp_qcflags.htm * <br>CODE SIGNIFICATION * <br>0 NOT CONTROLLED VALUE * <br>1 CORRECT VALUE * <br>2 VALUE INCONSISTENT WITH STATISTICS * <br>3 DOUBTFUL VALUE (spike, ...) * <br>4 FALSE VALUE (out of scale, constant profile, vertical instability, ...) * <br>5 VALUE MODIFIED DURING QC (only for interpolate location or date) * <br>6-8 Not USED * <br>9 NO VALUE * <br> * <br>I interpret as: okay values are 1, 2, 5 * * @param firstYear e.g., 1990 * @param firstMonth e.g., 1 (1..) * @param lastYear e.g., 2010 * @param lastMonth e.g., 12 (1..) * @param testMode if true, this just processes .nc files * already in testTempDir f:/data/gtspp/testTemp/ * and puts results in testDestDir f:/data/gtspp/testDest/. * So the first/last/Year/Month params are ignored. */ public static void bobConsolidateGtsppTgz(int firstYear, int firstMonth, int lastYear, int lastMonth, boolean testMode) throws Throwable { int chunkSize = 45; //lon width, lat height of a tile, in degrees int minLat = -90; int maxLat = 90; int minLon = -180; int maxLon = 180; String today = Calendar2.getCurrentISODateTimeStringZulu().substring(0, 10); //to nearest day String sevenZip = "c:\\progra~1\\7-Zip\\7z"; String zipDir = "c:\\data\\gtspp\\bestNcZip\\"; //gtspp_at199001.tgz String destDir = "c:\\data\\gtspp\\bestNcConsolidated\\"; String tempDir = "c:\\data\\gtspp\\temp\\"; String testTempDir = "c:\\data\\gtspp\\testTemp\\"; //tempDir if testMode=true String testDestDir = "c:\\data\\gtspp\\testDest\\"; //destDir if testMode=true String logFile = "c:\\data\\gtspp\\log" + String2.replaceAll(today, "-", "") + ".txt"; File2.makeDirectory(tempDir); //https://www.nodc.noaa.gov/GTSPP/document/qcmans/qcflags.htm //1=correct, 2=probably correct, 5=modified (so now correct) //pre 2012-04-15 was {1,2,5} //pre 2012-05-25 was {1,2} int okQF[] = { 1, 2, 5 }; String okQFCsv = String2.toCSSVString(okQF); float depthMV = 99999; //was -99; float temperatureMV = 99999; //was -99; float salinityMV = 99999; //was -99; int qMV = 9; String timeUnits = "days since 1900-01-01 00:00:00"; //causes roundoff error(!) double timeBaseAndFactor[] = Calendar2.getTimeBaseAndFactor(timeUnits); //impossible values: float minDepth = -0.4f, maxDepth = 10000; //-0.4 allows for imprecise values float minTemperature = -4, maxTemperature = 40; float minSalinity = 0, maxSalinity = 41; if (testMode) { firstYear = 1990; firstMonth = 1; lastYear = 1990; lastMonth = 1; } SSR.verbose = false; String2.setupLog(true, false, logFile, false, 1000000000); String2.log("*** starting bobConsolidateGtsppTgz " + Calendar2.getCurrentISODateTimeStringLocalTZ() + "\n" + "logFile=" + String2.logFileName() + "\n" + String2.standardHelpAboutMessage()); long elapsedTime = System.currentTimeMillis(); //q_pos (position quality flag), q_date_time (time quality flag) int stationCol = -1, organizationCol = -1, dataTypeCol = -1, platformCol = -1, cruiseCol = -1, longitudeCol = -1, latitudeCol = -1, timeCol = -1, depthCol = -1, temperatureCol = -1, salinityCol = -1; int totalNGoodStation = 0, totalNGoodPos = 0, totalNGoodTime = 0, totalNGoodDepth = 0, totalNGoodTemperature = 0, totalNGoodSalinity = 0; int totalNBadStation = 0, totalNBadPos = 0, totalNBadTime = 0, totalNBadDepth = 0, totalNBadTemperature = 0, totalNBadSalinity = 0, totalNWarnings = 0, totalNExceptions = 0; long totalNGoodRows = 0, totalNBadRows = 0; StringArray impossibleNanLat = new StringArray(); StringArray impossibleMinLat = new StringArray(); StringArray impossibleMaxLat = new StringArray(); StringArray impossibleNanLon = new StringArray(); StringArray impossibleMinLon = new StringArray(); StringArray impossibleMaxLon = new StringArray(); //StringArray impossibleNaNDepth = new StringArray(); StringArray impossibleMinDepth = new StringArray(); StringArray impossibleMaxDepth = new StringArray(); //StringArray impossibleNanTemperature = new StringArray(); StringArray impossibleMinTemperature = new StringArray(); StringArray impossibleMaxTemperature = new StringArray(); //StringArray impossibleNanSalinity = new StringArray(); StringArray impossibleMinSalinity = new StringArray(); StringArray impossibleMaxSalinity = new StringArray(); int nLons = 0, nLats = 0, nFiles = 0; int lonSum = 0, latSum = 0; long profilesSum = 0; long rowsSum = 0; //*** process a month's data int year = firstYear; int month = firstMonth; long chunkTime = System.currentTimeMillis(); while (year <= lastYear) { String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ() + " start processing year=" + year + " month=" + month); String zMonth = String2.zeroPad("" + month, 2); String zMonth1 = String2.zeroPad("" + (month + 1), 2); double minEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth + "-01"); double maxEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth1 + "-01"); //destination directory String tDestDir = testMode ? testDestDir : destDir + year + "\\" + zMonth + "\\"; File2.makeDirectory(tDestDir); HashMap tableHashMap = new HashMap(); //make sure all files are deleted int waitSeconds = 2; int nAttempts = 10; long cmdTime = System.currentTimeMillis(); String cmd = "del/q " + tDestDir + "*.*"; for (int attempt = 0; attempt < nAttempts; attempt++) { if (attempt % 8 == 0) { String2.log(cmd); SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds //File2.deleteAllFiles(tempDir); //previous method } Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle File destDirFile = new File(tDestDir); File files[] = destDirFile.listFiles(); String2.log(" nRemainingFiles=" + files.length); if (files.length == 0) break; waitSeconds = 2 * nAttempts; } String2.log(" cmd total time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //unzip all atlantic, indian, and pacific .zip files for that month String region2[] = { "at", "in", "pa" }; int nRegions = testMode ? 1 : 3; for (int region = 0; region < nRegions; region++) { String sourceBaseName = "gtspp4_" + region2[region] + year + zMonth; String sourceZipJustFileName = sourceBaseName + ".tgz"; String sourceZipName = zipDir + sourceZipJustFileName; if (!testMode) { //delete all files in tempDir waitSeconds = 2; nAttempts = 10; cmdTime = System.currentTimeMillis(); cmd = "del/q " + tempDir + "*.*"; String2.log(""); //blank line for (int attempt = 0; attempt < nAttempts; attempt++) { String2.log(cmd); SSR.dosShell(cmd, 30 * 60); //30 minutes*60 seconds //File2.deleteAllFiles(tempDir); //previous method //delete dirs too File2.deleteAllFiles(tempDir, true, true); Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle String2.log(" " + Math2.memoryString()); File tempDirFile = new File(tempDir); File files[] = tempDirFile.listFiles(); String2.log(" nRemainingFiles=" + files.length); if (files.length == 0) break; waitSeconds = 2 * nAttempts; } String2.log(" cmd total time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //unzip file into tempDir //gtspp_at199001.zip cmd = sevenZip + " -y e " + sourceZipName + " -o" + tempDir + " -r"; cmdTime = System.currentTimeMillis(); String2.log("\n*** " + cmd); if (File2.isFile(sourceZipName)) { try { SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds String2.log(" cmd time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); //extract from the .tar file //gtspp4_at199001.tar cmd = sevenZip + " -y e " + tempDir + sourceBaseName + ".tar -o" + tempDir + " -r"; cmdTime = System.currentTimeMillis(); String2.log("\n*** " + cmd); SSR.dosShell(cmd, 120 * 60); //120 minutes*60 seconds String2.log(" cmd time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime)); } catch (Exception e) { String2.log("Caught exception: " + MustBe.throwableToString(e)); } } //previous method //SSR.unzip(sourceZipName, // tempDir, true, 100 * 60, null); //ignoreZipDirectories, timeOutSeconds 100 minutes } //read each file and put data in proper table String tTempDir = testMode ? testTempDir : tempDir; File tTempDirAsFile = new File(tTempDir); String sourceFileNames[] = tTempDirAsFile.list(); //just the file names String2.log("\nunzipped " + sourceFileNames.length + " files"); int nSourceFileNames = //testMode? 100 : sourceFileNames.length; int nGoodStation = 0, nGoodPos = 0, nGoodTime = 0, nGoodDepth = 0, nGoodTemperature = 0, nGoodSalinity = 0, nGoodRows = 0; int nBadStation = 0, nBadPos = 0, nBadTime = 0, nBadDepth = 0, nBadTemperature = 0, nBadSalinity = 0, nBadRows = 0, nWarnings = 0, nExceptions = 0; long fileReadTime = System.currentTimeMillis(); profilesSum += nSourceFileNames; for (int sfi = 0; sfi < nSourceFileNames; sfi++) { String sourceFileName = sourceFileNames[sfi]; if (sfi % 10000 == 0) { //if (sfi > 0) //2012-12-13 commented out. Let Java handle it. // Math2.gc(3 * 1000); //gtspp: give OS time to settle //high water mark is ~160 MB, so memory not a problem String2.log("file #" + sfi + " " + Math2.memoryString()); } if (!sourceFileName.endsWith(".nc")) { //String2.log("ERROR: not a .nc file: " + sourceFileName); continue; } NetcdfFile ncFile = null; try { //get the station name //gtspp_13635162_te_111.nc gtspp_10313692_cu_111.nc if (!sourceFileName.matches("gtspp_[0-9]+_.*\\.nc")) { //was "\\d+")) {//all digits nBadStation++; throw new SimpleException("Invalid sourceFileName=" + sourceFileName); } int po = sourceFileName.indexOf('_', 6); if (po < 0) { nBadStation++; throw new SimpleException("Invalid sourceFileName=" + sourceFileName); } int station = String2.parseInt(sourceFileName.substring(6, po)); nGoodStation++; String key = sourceZipJustFileName + " " + sourceFileName; //open the file ncFile = NcHelper.openFile(tTempDir + sourceFileName); Variable var; Attributes tVarAtts = new Attributes(); String tUnits; //get all of the data //stream_ident var = ncFile.findVariable("stream_ident"); String organization = ""; String dataType = ""; if (var == null) { nWarnings++; String2.log("WARNING: No stream_ident in " + sourceFileName); } else { PrimitiveArray streamPA = NcHelper.getPrimitiveArray(var); if (streamPA instanceof StringArray && streamPA.size() > 0) { String stream = streamPA.getString(0); if (stream.length() >= 4) { organization = stream.substring(0, 2).trim(); dataType = stream.substring(2, 4).trim(); } else { String2.log("WARNING: stream_ident isn't a 4 char string: " + stream); } } else { String2.log("WARNING: stream_ident isn't a StringArray: " + streamPA.toString()); } } //platform_code var = ncFile.findVariable("gtspp_platform_code"); String platform = ""; if (var == null) { //a small percentage have this problem //nWarnings++; //String2.log("WARNING: No gtspp_platform_code in " + sourceFileName); } else { PrimitiveArray pa = NcHelper.getPrimitiveArray(var); if (pa instanceof StringArray && pa.size() > 0) { platform = pa.getString(0).trim(); //String2.log("platform_code=" + platform_code); } else { String2.log("WARNING: gtspp_platform_code isn't a StringArray: " + pa.toString()); } } //cruise var = ncFile.findVariable("cruise_id"); String cruise = ""; if (var == null) { nWarnings++; String2.log("WARNING: No cruise_id in " + sourceFileName); } else { PrimitiveArray cruisePA = NcHelper.getPrimitiveArray(var); if (cruisePA instanceof StringArray && cruisePA.size() > 0) { cruise = cruisePA.getString(0).trim(); } else { String2.log("WARNING: cruise_id isn't a StringArray: " + cruisePA.toString()); } } //prof_type is TEMP or PSAL so don't save it. /*var = ncFile.findVariable("prof_type"); String prof_type = ""; if (var == null) { nWarnings++; String2.log("WARNING: No prof_type in " + sourceFileName); } else { PrimitiveArray pa = NcHelper.getPrimitiveArray(var); if (pa instanceof StringArray && pa.size() > 0) { prof_type = pa.getString(0).trim(); String2.log("prof_type=" + prof_type); } else { String2.log("WARNING: prof_type isn't a StringArray: " + pa.toString()); } }*/ //position quality flag var = ncFile.findVariable("position_quality_flag"); //was "q_pos"); if (var == null) { nWarnings++; String2.log("WARNING: No position_quality_flag in " + sourceFileName); } else { PrimitiveArray q_pos = NcHelper.getPrimitiveArray(var); if (!(q_pos instanceof IntArray) || q_pos.size() != 1) throw new SimpleException("Invalid position_quality_flag=" + q_pos); int ti = q_pos.getInt(0); if (String2.indexOf(okQF, ti) < 0) { nBadPos++; continue; } //nGoodPos++; is below } //time quality flag var = ncFile.findVariable("time_quality_flag"); //q_date_time"); if (var == null) { nWarnings++; String2.log("WARNING: No time_quality_flag in " + sourceFileName); } else { PrimitiveArray q_date_time = NcHelper.getPrimitiveArray(var); if (!(q_date_time instanceof IntArray) || q_date_time.size() != 1) throw new SimpleException("Invalid time_quality_flag=" + q_date_time); int ti = q_date_time.getInt(0); if (String2.indexOf(okQF, ti) < 0) { nBadTime++; continue; } //nGoodTime is below } //time var = ncFile.findVariable("time"); if (var == null) throw new SimpleException("No time!"); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); tUnits = tVarAtts.getString("units"); if (!timeUnits.equals(tUnits)) throw new SimpleException("Invalid time units=" + tUnits); PrimitiveArray time = NcHelper.getPrimitiveArray(var); if (!(time instanceof DoubleArray) || time.size() != 1) throw new SimpleException("Invalid time=" + time); double tTime = Calendar2.unitsSinceToEpochSeconds(timeBaseAndFactor[0], timeBaseAndFactor[1], time.getDouble(0)); if (tTime < minEpochSeconds || tTime > maxEpochSeconds) throw new SimpleException( "Invalid tTime=" + Calendar2.safeEpochSecondsToIsoStringTZ(tTime, "")); //original times (that I looked at) are to nearest second //so round to nearest second (fix .99999 problems) tTime = Math.rint(tTime); nGoodTime++; //longitude (position qFlag is good) var = ncFile.findVariable("longitude"); if (var == null) { impossibleNanLon.add(key + " lon=null"); continue; } PrimitiveArray longitude = NcHelper.getPrimitiveArray(var); if (!(longitude instanceof FloatArray) || longitude.size() != 1) { impossibleNanLon.add(key + " lon=wrongTypeOrSize"); continue; } float lon = longitude.getFloat(0); if (Float.isNaN(lon)) { impossibleNanLon.add(key + " lon=NaN"); continue; } else if (lon < minLon) { impossibleMinLon.add(key + " lon=" + lon); //fall through } else if (lon > maxLon) { impossibleMaxLon.add(key + " lon=" + lon); //fall through } lon = (float) Math2.anglePM180(lon); //latitude (position qFlag is good) var = ncFile.findVariable("latitude"); if (var == null) { impossibleNanLat.add(key + " lat=null"); continue; } PrimitiveArray latitude = NcHelper.getPrimitiveArray(var); if (!(latitude instanceof FloatArray) || latitude.size() != 1) { impossibleNanLat.add(key + " lat=wrongTypeOrSize"); continue; } float lat = latitude.getFloat(0); if (Float.isNaN(lat)) { impossibleNanLat.add(key + " lat=NaN"); continue; } else if (lat < minLat) { impossibleMinLat.add(key + " lat=" + lat); continue; } else if (lat > maxLat) { impossibleMaxLat.add(key + " lat=" + lat); continue; } nGoodPos++; //depth var = ncFile.findVariable("z"); if (var == null) throw new SimpleException("No z!"); PrimitiveArray depth = NcHelper.getPrimitiveArray(var); if (!(depth instanceof FloatArray) || depth.size() == 0) throw new SimpleException("Invalid z=" + depth); int nDepth = depth.size(); //DEPH_qparm var = ncFile.findVariable("z_variable_quality_flag"); //DEPH_qparm"); if (var == null) throw new SimpleException("No z_variable_quality_flag!"); PrimitiveArray DEPH_qparm = NcHelper.getPrimitiveArray(var); if (!(DEPH_qparm instanceof IntArray) || DEPH_qparm.size() != nDepth) throw new SimpleException("Invalid z_variable_quality_flag=" + DEPH_qparm); //nGoodDepth is below //temperature var = ncFile.findVariable("temperature"); PrimitiveArray temperature; PrimitiveArray TEMP_qparm; float temperatureFV = temperatureMV; if (var == null) { //nWarnings++; //String2.log("WARNING: No temperature in " + sourceFileName); reasonably common temperature = PrimitiveArray.factory(float.class, nDepth, "" + temperatureMV); TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { temperature = NcHelper.getPrimitiveArray(var); if (!(temperature instanceof FloatArray) || temperature.size() != nDepth) throw new SimpleException("Invalid temperature=" + temperature); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); temperatureFV = tVarAtts.getFloat("_FillValue"); if (!Float.isNaN(temperatureFV) && temperatureFV != temperatureMV) throw new SimpleException("Invalid temperature _FillValue=" + temperatureFV); //TEMP_qparm var = ncFile.findVariable("temperature_quality_flag"); //TEMP_qparm"); if (var == null) { nWarnings++; String2.log("WARNING: No temperature_quality_flag in " + sourceFileName); TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { TEMP_qparm = NcHelper.getPrimitiveArray(var); if (!(TEMP_qparm instanceof IntArray) || TEMP_qparm.size() != nDepth) throw new SimpleException("Invalid temperature_quality_flag=" + TEMP_qparm); } } //salinity var = ncFile.findVariable("salinity"); PrimitiveArray salinity; PrimitiveArray PSAL_qparm; float salinityFV = salinityMV; if (var == null) { //String2.log("WARNING: No salinity in " + sourceFileName); //very common salinity = PrimitiveArray.factory(float.class, nDepth, "" + salinityMV); PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { salinity = NcHelper.getPrimitiveArray(var); if (!(salinity instanceof FloatArray) || salinity.size() != nDepth) throw new SimpleException("Invalid salinity=" + salinity); tVarAtts.clear(); NcHelper.getVariableAttributes(var, tVarAtts); salinityFV = tVarAtts.getFloat("_FillValue"); if (!Float.isNaN(salinityFV) && salinityFV != salinityMV) throw new SimpleException("Invalid salinity _FillValue=" + salinityFV); //PSAL_qparm var = ncFile.findVariable("salinity_quality_flag"); //PSAL_qparm"); if (var == null) { nWarnings++; String2.log("WARNING: No salinity_quality_flag in " + sourceFileName); PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV); } else { PSAL_qparm = NcHelper.getPrimitiveArray(var); if (!(PSAL_qparm instanceof IntArray) || PSAL_qparm.size() != nDepth) throw new SimpleException("Invalid salinity_quality_flag=" + PSAL_qparm); } } //clean the data //(good to do it here so memory usage is low -- table remains as small as possible) //Change "impossible" data to NaN //(from https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf //pg 61 has Table 2.1: Global Impossible Parameter Values). BitSet keep = new BitSet(); keep.set(0, nDepth); //all true //find worst impossible depth/temperature/salinity for this station //boolean tImpossibleNanDepth = false; //boolean tImpossibleNanTemperature = false; //boolean tImpossibleNanSalinity = false; float tImpossibleMinDepth = minDepth; float tImpossibleMaxDepth = maxDepth; float tImpossibleMinTemperature = minTemperature; float tImpossibleMaxTemperature = maxTemperature; float tImpossibleMinSalinity = minSalinity; float tImpossibleMaxSalinity = maxSalinity; for (int row = 0; row < nDepth; row++) { //DEPH_qparm int qs = DEPH_qparm.getInt(row); float f = depth.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { nBadDepth++; keep.clear(row); continue; } else if (Float.isNaN(f) || f == depthMV) { //"impossible" depth //tImpossibleNanDepth = true; nBadDepth++; keep.clear(row); continue; } else if (f < minDepth) { tImpossibleMinDepth = Math.min(tImpossibleMinDepth, f); nBadDepth++; keep.clear(row); continue; } else if (f > maxDepth) { tImpossibleMaxDepth = Math.max(tImpossibleMaxDepth, f); nBadDepth++; keep.clear(row); continue; } nGoodDepth++; boolean hasData = false; //temperature qs = TEMP_qparm.getInt(row); f = temperature.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { temperature.setString(row, ""); //so bad value is now NaN nBadTemperature++; } else if (Float.isNaN(f) || f == temperatureMV) { temperature.setString(row, ""); //so missing value is now NaN nBadTemperature++; } else if (f < minTemperature) { //"impossible" water temperature tImpossibleMinTemperature = Math.min(tImpossibleMinTemperature, f); temperature.setString(row, ""); //so impossible value is now NaN nBadTemperature++; } else if (f > maxTemperature) { //"impossible" water temperature tImpossibleMaxTemperature = Math.max(tImpossibleMaxTemperature, f); temperature.setString(row, ""); //so impossible value is now NaN nBadTemperature++; } else { nGoodTemperature++; hasData = true; } //salinity qs = PSAL_qparm.getInt(row); f = salinity.getFloat(row); if (String2.indexOf(okQF, qs) < 0) { salinity.setString(row, ""); //so bad value is now NaN nBadSalinity++; } else if (Float.isNaN(f) || f == salinityMV) { salinity.setString(row, ""); //so missing value is now NaN nBadSalinity++; } else if (f < minSalinity) { //"impossible" salinity tImpossibleMinSalinity = Math.min(tImpossibleMinSalinity, f); salinity.setString(row, ""); //so impossible value is now NaN nBadSalinity++; } else if (f > maxSalinity) { //"impossible" salinity tImpossibleMaxSalinity = Math.max(tImpossibleMaxSalinity, f); salinity.setString(row, ""); //so impossible value is now NaN nBadSalinity++; } else { nGoodSalinity++; hasData = true; } //no valid temperature or salinity data? if (!hasData) { keep.clear(row); } } //ensure sizes still correct Test.ensureEqual(depth.size(), nDepth, "depth.size changed!"); Test.ensureEqual(temperature.size(), nDepth, "temperature.size changed!"); Test.ensureEqual(salinity.size(), nDepth, "salinity.size changed!"); //actually remove the bad rows int tnGood = keep.cardinality(); if (testMode && verbose) String2.log( sourceFileName + ": nGoodRows=" + tnGood + " nBadRows=" + (nDepth - tnGood)); nGoodRows += tnGood; nBadRows += nDepth - tnGood; depth.justKeep(keep); temperature.justKeep(keep); salinity.justKeep(keep); nDepth = depth.size(); //impossible //if (tImpossibleNanDepth) // impossibleNanDepth.add(key + " hasNaN=true"); //if (tImpossibleNanTemperature) // impossibleNanTemperature.add(key + " hasNaN=true"); //if (tImpossibleNanSalinity) // impossibleNanSalinity.add(key + " hasNaN=true"); if (tImpossibleMinDepth < minDepth) impossibleMinDepth.add(key + " worst = " + tImpossibleMinDepth); if (tImpossibleMaxDepth > maxDepth) impossibleMaxDepth.add(key + " worst = " + tImpossibleMaxDepth); if (tImpossibleMinTemperature < minTemperature) impossibleMinTemperature.add(key + " worst = " + tImpossibleMinTemperature); if (tImpossibleMaxTemperature > maxTemperature) impossibleMaxTemperature.add(key + " worst = " + tImpossibleMaxTemperature); if (tImpossibleMinSalinity < minSalinity) impossibleMinSalinity.add(key + " worst = " + tImpossibleMinSalinity); if (tImpossibleMaxSalinity > maxSalinity) impossibleMaxSalinity.add(key + " worst = " + tImpossibleMaxSalinity); //which table if (tnGood == 0) continue; int loni = Math2 .roundToInt(Math.floor((Math.min(lon, maxLon - 0.1f) - minLon) / chunkSize)); int lati = Math2 .roundToInt(Math.floor((Math.min(lat, maxLat - 0.1f) - minLat) / chunkSize)); String outTableName = (minLon + loni * chunkSize) + "E_" + (minLat + lati * chunkSize) + "N"; //String2.replaceAll(cruise + "_" + organization + dataType, ' ', '_'); //too many: 3000+/month in 2011 Table tTable = (Table) tableHashMap.get(outTableName); if (tTable == null) { Attributes ncGlobalAtts = new Attributes(); NcHelper.getGlobalAttributes(ncFile, ncGlobalAtts); String tHistory = ncGlobalAtts.getString("history"); tHistory = tHistory != null && tHistory.length() > 0 ? tHistory + "\n" : ""; //make a table for this platform tTable = new Table(); Attributes ga = tTable.globalAttributes(); String ack = "These data were acquired from the US NOAA National Oceanographic Data Center (NODC) on " + today + " from https://www.nodc.noaa.gov/GTSPP/."; ga.add("acknowledgment", ack); ga.add("license", "These data are openly available to the public. " + "Please acknowledge the use of these data with:\n" + ack + "\n\n" + "[standard]"); ga.add("history", tHistory + ".tgz files from ftp.nodc.noaa.gov /pub/gtspp/best_nc/ (https://www.nodc.noaa.gov/GTSPP/)\n" + today + " Most recent ingest, clean, and reformat at ERD (bob.simons at noaa.gov)."); ga.add("infoUrl", "https://www.nodc.noaa.gov/GTSPP/"); ga.add("institution", "NOAA NODC"); ga.add("title", "Global Temperature and Salinity Profile Programme (GTSPP) Data"); String attName = "gtspp_ConventionVersion"; String attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_program"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_programVersion"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); attName = "gtspp_handbook_version"; attValue = ncGlobalAtts.getString(attName); if (attValue != null && attValue.length() > 0) ga.add(attName, attValue); organizationCol = tTable.addColumn(tTable.nColumns(), "org", new StringArray(), new Attributes()); platformCol = tTable.addColumn(tTable.nColumns(), "platform", new StringArray(), new Attributes()); dataTypeCol = tTable.addColumn(tTable.nColumns(), "type", new StringArray(), new Attributes()); cruiseCol = tTable.addColumn(tTable.nColumns(), "cruise", new StringArray(), new Attributes()); stationCol = tTable.addColumn(tTable.nColumns(), "station_id", new IntArray(), new Attributes()); longitudeCol = tTable.addColumn(tTable.nColumns(), "longitude", new FloatArray(), (new Attributes()).add("units", EDV.LON_UNITS)); latitudeCol = tTable.addColumn(tTable.nColumns(), "latitude", new FloatArray(), (new Attributes()).add("units", EDV.LAT_UNITS)); timeCol = tTable.addColumn(tTable.nColumns(), "time", new DoubleArray(), (new Attributes()).add("units", EDV.TIME_UNITS)); depthCol = tTable.addColumn(tTable.nColumns(), "depth", new FloatArray(), (new Attributes()).add("units", "m")); temperatureCol = tTable.addColumn(tTable.nColumns(), "temperature", new FloatArray(), (new Attributes()).add("units", "degree_C")); salinityCol = tTable.addColumn(tTable.nColumns(), "salinity", new FloatArray(), (new Attributes()).add("units", "1e-3")); //PSU changed to 1e-3 with CF std names 25 tableHashMap.put(outTableName, tTable); } //put data in tTable int oNRows = tTable.nRows(); ((StringArray) tTable.getColumn(organizationCol)).addN(nDepth, organization); ((StringArray) tTable.getColumn(platformCol)).addN(nDepth, platform); ((StringArray) tTable.getColumn(dataTypeCol)).addN(nDepth, dataType); ((StringArray) tTable.getColumn(cruiseCol)).addN(nDepth, cruise); ((IntArray) tTable.getColumn(stationCol)).addN(nDepth, station); ((FloatArray) tTable.getColumn(longitudeCol)).addN(nDepth, lon); ((FloatArray) tTable.getColumn(latitudeCol)).addN(nDepth, lat); ((DoubleArray) tTable.getColumn(timeCol)).addN(nDepth, tTime); ((FloatArray) tTable.getColumn(depthCol)).append(depth); ((FloatArray) tTable.getColumn(temperatureCol)).append(temperature); ((FloatArray) tTable.getColumn(salinityCol)).append(salinity); //ensure the table is valid (same size for each column) tTable.ensureValid(); } catch (Throwable t) { nExceptions++; String2.log( "ERROR while processing " + sourceFileName + "\n " + MustBe.throwableToString(t)); } finally { //always close the ncFile if (ncFile != null) { try { ncFile.close(); } catch (Throwable t) { String2.log("ERROR: unable to close " + sourceFileName + "\n" + MustBe.getShortErrorMessage(t)); } } } } String2.log("\n time to read all those files = " + Calendar2.elapsedTimeString(System.currentTimeMillis() - fileReadTime)); //end of region loop String2.log("\nIn zip=" + sourceZipName + "\n nExceptions= " + nExceptions + " nWarnings=" + nWarnings + "\n nBadStation= " + nBadStation + " nGoodStation=" + nGoodStation + "\n nBadPos= " + nBadPos + " nGoodPos=" + nGoodPos + "\n nBadTime= " + nBadTime + " nGoodTime=" + nGoodTime + "\n nBadDepth= " + nBadDepth + " nGoodDepth=" + nGoodDepth + "\n nBadTemperature=" + nBadTemperature + " nGoodTemperature=" + nGoodTemperature + "\n nBadSalinity= " + nBadSalinity + " nGoodSalinity=" + nGoodSalinity); totalNGoodStation += nGoodStation; totalNGoodPos += nGoodPos; totalNGoodTime += nGoodTime; totalNGoodDepth += nGoodDepth; totalNGoodTemperature += nGoodTemperature; totalNGoodSalinity += nGoodSalinity; totalNGoodRows += nGoodRows; totalNBadPos += nBadPos; totalNBadTime += nBadTime; totalNBadDepth += nBadDepth; totalNBadTemperature += nBadTemperature; totalNBadSalinity += nBadSalinity; totalNBadRows += nBadRows; totalNWarnings += nWarnings; totalNExceptions += nExceptions; } //end of region loop //save by outTableName boolean filePrinted = false; Object keys[] = tableHashMap.keySet().toArray(); int nKeys = keys.length; String2.log("\n*** saving nFiles=" + nKeys); for (int keyi = 0; keyi < nKeys; keyi++) { String key = keys[keyi].toString(); Table tTable = (Table) tableHashMap.remove(key); if (tTable == null || tTable.nRows() == 0) { String2.log("Unexpected: no table for key=" + key); continue; } //sort by time, station, depth //depth matches the source files: from surface to deepest tTable.sort(new int[] { timeCol, stationCol, depthCol }, new boolean[] { true, true, true }); //is this saving a small lat lon range? double stationStats[] = tTable.getColumn(stationCol).calculateStats(); //double lonStats[] = tTable.getColumn(longitudeCol).calculateStats(); //double latStats[] = tTable.getColumn(latitudeCol).calculateStats(); //nLats++; //double latRange = latStats[PrimitiveArray.STATS_MAX] - latStats[PrimitiveArray.STATS_MIN]; //latSum += latRange; rowsSum += tTable.nRows(); String2.log(" stationRange=" + Math2.roundToInt( stationStats[PrimitiveArray.STATS_MAX] - stationStats[PrimitiveArray.STATS_MIN]) + //" lonRange=" + Math2.roundToInt(lonStats[ PrimitiveArray.STATS_MAX] - lonStats[ PrimitiveArray.STATS_MIN]) + //" latRange=" + Math2.roundToInt(latRange) + " nRows=" + tTable.nRows()); //save it String tName = tDestDir + String2.encodeFileNameSafe(key); /*if (lonStats[PrimitiveArray.STATS_MAX] > 45 && lonStats[PrimitiveArray.STATS_MIN] < -45) { //NO MORE: This happened with 1 file/cruise, // but won't happen now with lon/lat tiles. //crosses dateline (or widely across lon=0)? split into 2 files Table ttTable = (Table)tTable.clone(); ttTable.oneStepApplyConstraint(0, "longitude", "<", "0"); ttTable.saveAsFlatNc(tName + "_W.nc", "row", false); double lonStatsW[] = ttTable.getColumn(longitudeCol).calculateStats(); nLons++; double lonRangeW = lonStatsW[PrimitiveArray.STATS_MAX] - lonStatsW[PrimitiveArray.STATS_MIN]; lonSum += lonRangeW; ttTable = (Table)tTable.clone(); ttTable.oneStepApplyConstraint(0, "longitude", ">=", "0"); ttTable.saveAsFlatNc(tName + "_E.nc", "row", false); double lonStatsE[] = ttTable.getColumn(longitudeCol).calculateStats(); nLons++; double lonRangeE = lonStatsE[PrimitiveArray.STATS_MAX] - lonStatsE[PrimitiveArray.STATS_MIN]; lonSum += lonRangeE; String2.log(" westLonRange=" + Math2.roundToInt(lonRangeW) + " eastLonRange=" + Math2.roundToInt(lonRangeE)); } else */ { //nLons++; nFiles++; //create trajectory variable: platform + cruise StringArray pl = (StringArray) tTable.getColumn("platform"); StringArray cr = (StringArray) tTable.getColumn("cruise"); StringArray or = (StringArray) tTable.getColumn("org"); StringArray ty = (StringArray) tTable.getColumn("type"); StringArray tr = new StringArray(); int n = pl.size(); for (int i = 0; i < n; i++) { pl.set(i, String2.whitespacesToSpace(pl.get(i))); cr.set(i, String2.whitespacesToSpace(cr.get(i))); or.set(i, String2.whitespacesToSpace(or.get(i))); ty.set(i, String2.whitespacesToSpace(ty.get(i))); tr.add(or.getString(i) + "_" + ty.getString(i) + "_" + pl.getString(i) + "_" + cr.getString(i)); } tTable.addColumn(0, "trajectory", tr, new Attributes()); tTable.saveAsFlatNc(tName + ".nc", "row", false); //convertToFakeMissingValues (keep mv's as NaNs) } //print a file if (testMode && !filePrinted) { filePrinted = true; String2.log(NcHelper.dumpString(tName, true)); } } String2.log("\ncumulative nProfiles=" + profilesSum + " nRows=" + rowsSum + " mean nRows/file=" + (rowsSum / Math.max(1, nFiles))); //if (nLats > 0) // String2.log( "cumulative nLats=" + nLats + " meanLatRange=" + (float)(latSum / nLats)); //if (nLons > 0) { // String2.log( "cumulative nLons=" + nLons + " meanLonRange=" + (float)(lonSum / nLons)); // String2.log("mean nRows per saved file = " + (rowsSum / nLons)); //} //print list of impossible at end of year or end of run if (month == 12 || (year == lastYear && month == lastMonth)) { String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ() + " bobConsolidateGtsppTgz finished the chunk ending " + year + "-" + month + "\n" + "chunkTime=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - chunkTime)); chunkTime = System.currentTimeMillis(); //print impossible statistics String2.log("\nCumulative number of stations with:\n" + "impossibleNanLon = " + impossibleNanLon.size() + "\n" + "impossibleMinLon = " + impossibleMinLon.size() + "\n" + "impossibleMaxLon = " + impossibleMaxLon.size() + "\n" + "impossibleNanLat = " + impossibleNanLat.size() + "\n" + "impossibleMinLat = " + impossibleMinLat.size() + "\n" + "impossibleMaxLat = " + impossibleMaxLat.size() + "\n" + "impossibleMinDepth = " + impossibleMinDepth.size() + "\n" + "impossibleMaxDepth = " + impossibleMaxDepth.size() + "\n" + //"impossibleLatLon = " + impossibleLatLon.size() + "\n" + "impossibleMinTemperature = " + impossibleMinTemperature.size() + "\n" + "impossibleMaxTemperature = " + impossibleMaxTemperature.size() + "\n" + "impossibleMinSalinity = " + impossibleMinSalinity.size() + "\n" + "impossibleMaxSalinity = " + impossibleMaxSalinity.size() + "\n"); //lon String2.log("\n*** " + impossibleNanLon.size() + " stations had invalid lon" + " and good pos quality flags (" + okQFCsv + ")."); impossibleNanLon.sortIgnoreCase(); String2.log(impossibleNanLon.toNewlineString()); String2.log("\n*** " + impossibleMinLon.size() + " stations had lon<" + minLon + " and good pos quality flags (" + okQFCsv + ")."); impossibleMinLon.sortIgnoreCase(); String2.log(impossibleMinLon.toNewlineString()); String2.log("\n*** " + impossibleMaxLon.size() + " stations had lon>" + maxLon + " and good pos quality flags (" + okQFCsv + ")."); impossibleMaxLon.sortIgnoreCase(); String2.log(impossibleMaxLon.toNewlineString()); //lat String2.log("\n*** " + impossibleNanLat.size() + " stations had invalid lat" + " and good pos quality flags (" + okQFCsv + ")."); impossibleNanLat.sortIgnoreCase(); String2.log(impossibleNanLat.toNewlineString()); String2.log("\n*** " + impossibleMinLat.size() + " stations had lat<" + minLat + " and good pos quality flags (" + okQFCsv + ")."); impossibleMinLat.sortIgnoreCase(); String2.log(impossibleMinLat.toNewlineString()); String2.log("\n*** " + impossibleMaxLat.size() + " stations had lat>" + maxLat + " and good pos quality flags (" + okQFCsv + ")."); impossibleMaxLat.sortIgnoreCase(); String2.log(impossibleMaxLat.toNewlineString()); //depth String2.log("\n*** " + impossibleMinDepth.size() + " stations had depth<" + minDepth + " and good depth quality flags (" + okQFCsv + ")."); impossibleMinDepth.sortIgnoreCase(); String2.log(impossibleMinDepth.toNewlineString()); String2.log("\n*** " + impossibleMaxDepth.size() + " stations had depth>" + maxDepth + " and good depth quality flags (" + okQFCsv + ")."); impossibleMaxDepth.sortIgnoreCase(); String2.log(impossibleMaxDepth.toNewlineString()); //sa = impossibleLatLon.toArray(); //Arrays.sort(sa); //String2.log("\n*** " + sa.length + " stations had impossible latitude or longitude values" + // " and good q_pos quality flags."); //String2.log(String2.toNewlineString(sa)); String2.log("\n*** " + impossibleMinTemperature.size() + " stations had temperature<" + minTemperature + " and good temperature quality flags (" + okQFCsv + ")."); impossibleMinTemperature.sortIgnoreCase(); String2.log(impossibleMinTemperature.toNewlineString()); String2.log("\n*** " + impossibleMaxTemperature.size() + " stations had temperature>" + maxTemperature + " and good temperature quality flags (" + okQFCsv + ")."); impossibleMaxTemperature.sortIgnoreCase(); String2.log(impossibleMaxTemperature.toNewlineString()); String2.log("\n*** " + impossibleMinSalinity.size() + " stations had salinity<" + minSalinity + " and good salinity quality flags (" + okQFCsv + ")."); impossibleMinSalinity.sortIgnoreCase(); String2.log(impossibleMinSalinity.toNewlineString()); String2.log("\n*** " + impossibleMaxSalinity.size() + " stations had salinity>" + maxSalinity + " and good salinity quality flags (" + okQFCsv + ")."); impossibleMaxSalinity.sortIgnoreCase(); String2.log(impossibleMaxSalinity.toNewlineString()); } //are we done? if (year == lastYear && month == lastMonth) break; //increment the month month++; if (month == 13) { year++; month = 1; } } //end of month/year loop String2.log("\n*** bobConsolidateGtspp completely finished " + firstYear + "-" + firstMonth + " through " + lastYear + "-" + lastMonth); String2.log("\n***" + "\ntotalNExceptions= " + totalNExceptions + " totalNWarnings= " + totalNWarnings + "\ntotalNBadStation= " + totalNBadStation + " totalNGoodStation= " + totalNGoodStation + "\ntotalNBadPos= " + totalNBadPos + " totalNGoodPos= " + totalNGoodPos + "\ntotalNBadTime= " + totalNBadTime + " totalNGoodTime= " + totalNGoodTime + "\ntotalNBadDepth= " + totalNBadDepth + " totalNGoodDepth= " + totalNGoodDepth + "\ntotalNBadTemperature=" + totalNBadTemperature + " totalNGoodTemperature=" + totalNGoodTemperature + "\ntotalNBadSalinity= " + totalNBadSalinity + " totalNGoodSalinity= " + totalNGoodSalinity + "\ntotalNBadRows= " + totalNBadRows + " totalNGoodRows= " + totalNGoodRows + "\nlogFile=F:/data/gtspp/log.txt" + "\n\n*** all finished time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - elapsedTime)); String2.returnLoggingToSystemOut(); }