Example usage for java.util BitSet cardinality

List of usage examples for java.util BitSet cardinality

Introduction

In this page you can find the example usage for java.util BitSet cardinality.

Prototype

public int cardinality() 

Source Link

Document

Returns the number of bits set to true in this BitSet .

Usage

From source file:uk.ac.ebi.orchem.search.SimilaritySearch.java

/**
 * Performs a similarity search between a query molecule and the orchem fingerprint table.
 *
 * @param queryFp fingerprint of the query molecule
 * @param _cutOff tanimoto score below which to stop searching
 * @param _topN top N results after which to stop searching
 * @param debugYN Y or N to debug output back
 * @param idsOnlyYN Y or N to indicate to just return IDs of results (faster)
 * @param extraWhereClause option to include an extra SQL where clause refering to the base compound table
 * @return array of {@link uk.ac.ebi.orchem.bean.OrChemCompound compounds}
 * @throws Exception/*from ww w  .  j a va 2 s.  c om*/
 */
private static oracle.sql.ARRAY search(BitSet queryFp, Float _cutOff, Integer _topN, String debugYN,
        String idsOnlyYN, String extraWhereClause) throws Exception {

    /*
     * 
    The comment block below describes the search algorithm.
    From:
     "Bounds and Algorithms for Fast Exact Searches of Chemical Fingerprints in Linear and Sub-Linear Time"
      S.Joshua Swamidass and Pierre Baldi
      http://dx.doi.org/10.1021/ci600358f
            
     Top K Hits
     ----------
     We can search for the top K hits by starting from the maximum (where A=B), and exploring discrete possible
     values of B right and left of the maximum.
            
     More precisely, for binary fingerprints, we first
     index the molecules in the database by their fingerprint "bit count"
     to enable efficient referencing
     of a particular bit count bin.
            
     Next, with respect to a particular query, we calculate the bound
     on the similarity for every bit count in the database.
            
     Then we sort these bit counts by their associated bound and iterate over the
     molecules in the database, in order of decreasing bound.
            
     As we iterate, we calculate the similarity between the query and the database molecule and use
     a heap to efficiently track the top hits. The algorithm terminates when
     "the lowest similarity value in the heap is greater than the bound associated with the current database bin"
            
     Algorithm 1 Top K Search
     Require: database of fingerprints binned by bit count Bs
     Ensure: hits contains top K hits which satisfy SIMILARITY( ) > T
            
     1:  hits <- MINHEAP()
     2:  bounds <- LIST()
     3:  for all B in database do //iterate over bins
     4:    tuple <- TUPLE(BOUND(A,B),B)
     5:    LISTAPPEND(bounds, tuple)
     6:  end for
     7:  QUICKSORT(bounds) //NOTE: the length of bounds is constant
     8:  for all bound, B in bounds do //iterate in order of decreasing bound
     9:    if bound < T then
     10:      break //threshold stopping condition
     11:   end if
     12:   if K  HEAPSIZE(hits) and bound < MINSIMILARITY(hits) then
     13:     break //top-K stopping condition
     14:   end if
     15:   for all in database[B] do
     16:     S=SIMILARITY( )
     17:     tuple <- TUPLE(S, )
     18:     if S  T then
     19:        continue //ignore this and continue to next
     20:     else if LENGTH(hits)< K then
     21:        HEAPPUSH(hits, tuple)
     22:     else if S > MINSIMILARITY(hits) then
     23:       HEAPPOPMIN(hits)
     24:       HEAPPUSH(hits,tuple)
     25:     end if
     26:   end for
     27: end for
     28: return hits
     */

    boolean debugging = false;
    if (debugYN.toLowerCase().equals("y"))
        debugging = true;

    debug("started", debugging);

    /**********************************************************************
     * Similarity search algorithm section                                *
     *                                                                    *
     **********************************************************************/
    Comparator heapComparator = new SimHeapElementTanimComparator();
    PriorityBuffer heap = null;
    OracleConnection conn = null;
    PreparedStatement pstmtFp = null;
    PreparedStatement pstmLookup = null;

    String query = " select bit_count, id, fp from orchem_fingprint_simsearch s where  bit_count = ? ";

    float cutOff = _cutOff.floatValue();
    int topN = -1;
    if (_topN == null) {
        debug("No topN breakout specified.. searching until lower bound reached", debugging);
    } else {
        topN = _topN.intValue();
        debug("topN is " + topN + ", result set size limited.", debugging);
    }

    try {
        conn = (OracleConnection) new OracleDriver().defaultConnection();

        String compoundTableName = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_TABLE, conn);
        String compoundTablePkColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_PK, conn);
        String compoundTableMolfileColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_MOL,
                conn);

        if (extraWhereClause != null) {
            query = " select s.bit_count, s.id, s.fp from " + " orchem_fingprint_simsearch s , "
                    + compoundTableName + " c " + " where  s.bit_count = ? " + " and s.id = c."
                    + compoundTablePkColumn + " " + " and " + extraWhereClause;
            debug("QUERY is " + query, debugging);
        }

        float queryBitCount = queryFp.cardinality();
        byte[] queryBytes = Utils.toByteArray(queryFp, extFpSize);
        int queryByteArrLen = queryBytes.length;

        float lowBucketNum = queryBitCount - 1;
        float highBucketNum = queryBitCount + 1;
        float currBucketNum = queryBitCount;

        pstmtFp = conn.prepareStatement(query);
        pstmtFp.setFetchSize(250);

        ResultSet resFp = null;
        boolean done = false;
        byte[] dbByteArray = null;
        float tanimotoCoeff = 0f;
        heap = new PriorityBuffer(true, heapComparator);
        int bucksSearched = 0;
        int loopCount = 0;

        while (!done) {
            debug("bucket is " + currBucketNum, debugging);
            loopCount++;
            pstmtFp.setFloat(1, currBucketNum);
            bucksSearched++;
            resFp = pstmtFp.executeQuery();

            float bound = 0f;
            if (currBucketNum < queryBitCount)
                bound = currBucketNum / queryBitCount;
            else
                bound = queryBitCount / currBucketNum;

            /* Algorithm step 9..11
               Here we can break out because the tanimoto score is becoming to low */
            if (bound < cutOff) {
                debug("bound < cutOff, done", debugging);
                done = true;
            }

            if (!done) {
                //Algorithm 15-26
                while (resFp.next()) {
                    dbByteArray = resFp.getBytes("fp");
                    tanimotoCoeff = calcTanimoto(queryBytes, queryByteArrLen, dbByteArray, queryBitCount,
                            currBucketNum);

                    if (tanimotoCoeff >= cutOff) {
                        SimHeapElement elm = new SimHeapElement();
                        elm.setID(resFp.getString("id"));
                        elm.setTanimotoCoeff(new Float(tanimotoCoeff));

                        if (heap.size() < topN || topN == -1) {
                            heap.add(elm);
                            debug("add elem " + elm.getID(), debugging);

                        } else if (tanimotoCoeff > ((SimHeapElement) (heap.get())).getTanimotoCoeff()
                                .floatValue()) {
                            heap.remove();
                            heap.add(elm);
                            debug("remove + add elem " + elm.getID(), debugging);
                        }
                    }
                }
                resFp.close();
                /* Algorithm 12-14:
                 * When top N hits is reached, and the lowest score of the
                 * hits is greater than the current bucket bound, stop.
                 * If not, the next bucket may contain a better score, so go on.
                 */

                if (topN != -1 && heap.size() >= topN
                        && ((SimHeapElement) (heap.get())).getTanimotoCoeff().floatValue() > bound) {
                    done = true;
                    debug("topN reached, done", debugging);

                } else {
                    // calculate new currBucket
                    float up = queryBitCount / highBucketNum;
                    float down = lowBucketNum / queryBitCount;

                    if (up > down) {
                        currBucketNum = highBucketNum;
                        highBucketNum++;
                    } else {
                        currBucketNum = lowBucketNum;
                        lowBucketNum--;
                    }

                    if (lowBucketNum < 1 && highBucketNum > extFpSize)
                        done = true;
                }
            }
        }
        debug("searched bit_count buckets: " + loopCount, debugging);

        /********************************************************************
         * Search completed.                                                *
         *                                                                  *
         * Next section is just looking up the compounds by ID and          *
         * returning the results, sorted by Tanimoto coefficient            *
         *                                                                  *
         *******************************************************************/
        String lookupCompoundQuery = " select " + compoundTableMolfileColumn + " from " + " "
                + compoundTableName + " where " + " " + compoundTablePkColumn + " =?";

        pstmLookup = conn.prepareStatement(lookupCompoundQuery);
        List compounds = new ArrayList();

        while (heap.size() != 0) {
            SimHeapElement bElm = (SimHeapElement) heap.remove();

            if (idsOnlyYN.equals("N")) {
                // return structure to user
                pstmLookup.setString(1, bElm.getID());
                ResultSet resLookup = pstmLookup.executeQuery();
                if (resLookup.next()) {
                    OrChemCompound c = new OrChemCompound();
                    c.setId(bElm.getID());
                    c.setScore(bElm.getTanimotoCoeff().floatValue());
                    c.setMolFileClob(resLookup.getClob(compoundTableMolfileColumn));
                    compounds.add(c);
                }
                resLookup.close();
            } else {
                // only return ID and score to user
                OrChemCompound c = new OrChemCompound();
                c.setId(bElm.getID());
                c.setScore(bElm.getTanimotoCoeff().floatValue());
                compounds.add(c);
            }
        }
        pstmLookup.close();
        long befSort = System.currentTimeMillis();
        Collections.sort(compounds, new OrChemCompoundTanimComparator());
        debug("sorting time (ms) " + (System.currentTimeMillis() - befSort), debugging);

        OrChemCompound[] output = new OrChemCompound[compounds.size()];
        for (int i = 0; i < compounds.size(); i++) {
            output[i] = (OrChemCompound) (compounds.get(i));
        }
        ArrayDescriptor arrayDescriptor = ArrayDescriptor.createDescriptor("ORCHEM_COMPOUND_LIST", conn);
        debug("#compounds in result list : " + compounds.size(), debugging);
        debug("ended", debugging);
        return new ARRAY(arrayDescriptor, conn, output);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw (ex);
    } finally {
        if (pstmLookup != null)
            pstmLookup.close();
        if (pstmtFp != null)
            pstmtFp.close();
        if (conn != null)
            conn.close();
    }
}

From source file:org.apache.openjpa.kernel.BrokerImpl.java

public boolean isCached(List<Object> oids) {
    BitSet loaded = new BitSet(oids.size());
    //check L1 cache first
    for (int i = 0; i < oids.size(); i++) {
        Object oid = oids.get(i);
        if (_cache.getById(oid, false) != null) {
            loaded.set(i);/*from  w  w  w . j  a  v a 2 s. c o m*/
        }
    }
    if (loaded.cardinality() == oids.size()) {
        return true;
    }
    return _store.isCached(oids, loaded);
}

From source file:edu.uci.ics.asterix.optimizer.rules.am.BTreeAccessMethod.java

private ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalOperator> topOpRef,
        Mutable<ILogicalExpression> conditionRef, OptimizableOperatorSubTree indexSubTree,
        OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx,
        boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context)
        throws AlgebricksException {
    Dataset dataset = indexSubTree.dataset;
    ARecordType recordType = indexSubTree.recordType;
    // we made sure indexSubTree has datasource scan
    AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.dataSourceRef
            .getValue();/*from  w  w w.j  a  va2  s .c  o m*/
    List<Pair<Integer, Integer>> exprAndVarList = analysisCtx.indexExprsAndVars.get(chosenIndex);
    List<IOptimizableFuncExpr> matchedFuncExprs = analysisCtx.matchedFuncExprs;
    int numSecondaryKeys = analysisCtx.indexNumMatchedKeys.get(chosenIndex);
    // List of function expressions that will be replaced by the secondary-index search.
    // These func exprs will be removed from the select condition at the very end of this method.
    Set<ILogicalExpression> replacedFuncExprs = new HashSet<ILogicalExpression>();

    // Info on high and low keys for the BTree search predicate.
    ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys];
    ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys];
    LimitType[] lowKeyLimits = new LimitType[numSecondaryKeys];
    LimitType[] highKeyLimits = new LimitType[numSecondaryKeys];
    boolean[] lowKeyInclusive = new boolean[numSecondaryKeys];
    boolean[] highKeyInclusive = new boolean[numSecondaryKeys];

    // TODO: For now we don't do any sophisticated analysis of the func exprs to come up with "the best" range predicate.
    // If we can't figure out how to integrate a certain funcExpr into the current predicate, we just bail by setting this flag.
    boolean couldntFigureOut = false;
    boolean doneWithExprs = false;
    boolean isEqCondition = false;
    // TODO: For now don't consider prefix searches.
    BitSet setLowKeys = new BitSet(numSecondaryKeys);
    BitSet setHighKeys = new BitSet(numSecondaryKeys);
    // Go through the func exprs listed as optimizable by the chosen index,
    // and formulate a range predicate on the secondary-index keys.

    // checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value
    // since we have a round issues when dealing with LT(<) OR GT(>) operator.
    boolean realTypeConvertedToIntegerType = false;

    for (Pair<Integer, Integer> exprIndex : exprAndVarList) {
        // Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs.
        IOptimizableFuncExpr optFuncExpr = matchedFuncExprs.get(exprIndex.first);
        int keyPos = indexOf(optFuncExpr.getFieldName(0), chosenIndex.getKeyFieldNames());
        if (keyPos < 0) {
            if (optFuncExpr.getNumLogicalVars() > 1) {
                // If we are optimizing a join, the matching field may be the second field name.
                keyPos = indexOf(optFuncExpr.getFieldName(1), chosenIndex.getKeyFieldNames());
            }
        }
        if (keyPos < 0) {
            throw new AlgebricksException(
                    "Could not match optimizable function expression to any index field name.");
        }
        Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils
                .createSearchKeyExpr(optFuncExpr, indexSubTree, probeSubTree);
        ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first;
        realTypeConvertedToIntegerType = returnedSearchKeyExpr.second;

        LimitType limit = getLimitType(optFuncExpr, probeSubTree);

        // If a DOUBLE or FLOAT constant is converted to an INT type value,
        // we need to check a corner case where two real values are located between an INT value.
        // For example, for the following query,
        //
        // for $emp in dataset empDataset
        // where $emp.age > double("2.3") and $emp.age < double("3.3")
        // return $emp.id;
        //
        // It should generate a result if there is a tuple that satisfies the condition, which is 3,
        // however, it does not generate the desired result since finding candidates
        // fail after truncating the fraction part (there is no INT whose value is greater than 2 and less than 3.)
        //
        // Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates.
        // This does not change the result of an actual comparison since this conversion is only applied
        // for finding candidates from an index.
        //
        if (realTypeConvertedToIntegerType) {
            if (limit == LimitType.HIGH_EXCLUSIVE) {
                limit = LimitType.HIGH_INCLUSIVE;
            } else if (limit == LimitType.LOW_EXCLUSIVE) {
                limit = LimitType.LOW_INCLUSIVE;
            }
        }

        switch (limit) {
        case EQUAL: {
            if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) {
                lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit;
                lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true;
                lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr;
                setLowKeys.set(keyPos);
                setHighKeys.set(keyPos);
                isEqCondition = true;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
                        && lowKeyExprs[keyPos].equals(searchKeyExpr) && highKeyLimits[keyPos] == limit
                        && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    isEqCondition = true;
                    break;
                }
                couldntFigureOut = true;
            }
            // TODO: For now don't consider prefix searches.
            // If high and low keys are set, we exit for now.
            if (setLowKeys.cardinality() == numSecondaryKeys && setHighKeys.cardinality() == numSecondaryKeys) {
                doneWithExprs = true;
            }
            break;
        }
        case HIGH_EXCLUSIVE: {
            if (highKeyLimits[keyPos] == null || (highKeyLimits[keyPos] != null && highKeyInclusive[keyPos])) {
                highKeyLimits[keyPos] = limit;
                highKeyExprs[keyPos] = searchKeyExpr;
                highKeyInclusive[keyPos] = false;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == false
                        && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case HIGH_INCLUSIVE: {
            if (highKeyLimits[keyPos] == null) {
                highKeyLimits[keyPos] = limit;
                highKeyExprs[keyPos] = searchKeyExpr;
                highKeyInclusive[keyPos] = true;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true
                        && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case LOW_EXCLUSIVE: {
            if (lowKeyLimits[keyPos] == null || (lowKeyLimits[keyPos] != null && lowKeyInclusive[keyPos])) {
                lowKeyLimits[keyPos] = limit;
                lowKeyExprs[keyPos] = searchKeyExpr;
                lowKeyInclusive[keyPos] = false;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == false
                        && lowKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case LOW_INCLUSIVE: {
            if (lowKeyLimits[keyPos] == null) {
                lowKeyLimits[keyPos] = limit;
                lowKeyExprs[keyPos] = searchKeyExpr;
                lowKeyInclusive[keyPos] = true;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
                        && lowKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        default: {
            throw new IllegalStateException();
        }
        }
        if (!couldntFigureOut) {
            // Remember to remove this funcExpr later.
            replacedFuncExprs.add(matchedFuncExprs.get(exprIndex.first).getFuncExpr());
        }
        if (doneWithExprs) {
            break;
        }
    }
    if (couldntFigureOut) {
        return null;
    }

    // If the select condition contains mixed open/closed intervals on multiple keys, then we make all intervals closed to obtain a superset of answers and leave the original selection in place.
    boolean primaryIndexPostProccessingIsNeeded = false;
    for (int i = 1; i < numSecondaryKeys; ++i) {
        if (lowKeyInclusive[i] != lowKeyInclusive[0]) {
            Arrays.fill(lowKeyInclusive, true);
            primaryIndexPostProccessingIsNeeded = true;
            break;
        }
    }
    for (int i = 1; i < numSecondaryKeys; ++i) {
        if (highKeyInclusive[i] != highKeyInclusive[0]) {
            Arrays.fill(highKeyInclusive, true);
            primaryIndexPostProccessingIsNeeded = true;
            break;
        }
    }

    // determine cases when prefix search could be applied
    for (int i = 1; i < lowKeyExprs.length; i++) {
        if (lowKeyLimits[0] == null && lowKeyLimits[i] != null
                || lowKeyLimits[0] != null && lowKeyLimits[i] == null
                || highKeyLimits[0] == null && highKeyLimits[i] != null
                || highKeyLimits[0] != null && highKeyLimits[i] == null) {
            numSecondaryKeys--;
            primaryIndexPostProccessingIsNeeded = true;
        }
    }
    if (lowKeyLimits[0] == null) {
        lowKeyInclusive[0] = true;
    }
    if (highKeyLimits[0] == null) {
        highKeyInclusive[0] = true;
    }

    // Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index search.
    // List of variables for the assign.
    ArrayList<LogicalVariable> keyVarList = new ArrayList<LogicalVariable>();
    // List of variables and expressions for the assign.
    ArrayList<LogicalVariable> assignKeyVarList = new ArrayList<LogicalVariable>();
    ArrayList<Mutable<ILogicalExpression>> assignKeyExprList = new ArrayList<Mutable<ILogicalExpression>>();
    int numLowKeys = createKeyVarsAndExprs(numSecondaryKeys, lowKeyLimits, lowKeyExprs, assignKeyVarList,
            assignKeyExprList, keyVarList, context);
    int numHighKeys = createKeyVarsAndExprs(numSecondaryKeys, highKeyLimits, highKeyExprs, assignKeyVarList,
            assignKeyExprList, keyVarList, context);

    BTreeJobGenParams jobGenParams = new BTreeJobGenParams(chosenIndex.getIndexName(), IndexType.BTREE,
            dataset.getDataverseName(), dataset.getDatasetName(), retainInput, retainNull, requiresBroadcast);
    jobGenParams.setLowKeyInclusive(lowKeyInclusive[0]);
    jobGenParams.setHighKeyInclusive(highKeyInclusive[0]);
    jobGenParams.setIsEqCondition(isEqCondition);
    jobGenParams.setLowKeyVarList(keyVarList, 0, numLowKeys);
    jobGenParams.setHighKeyVarList(keyVarList, numLowKeys, numHighKeys);

    ILogicalOperator inputOp = null;
    if (!assignKeyVarList.isEmpty()) {
        // Assign operator that sets the constant secondary-index search-key fields if necessary.
        AssignOperator assignConstantSearchKeys = new AssignOperator(assignKeyVarList, assignKeyExprList);
        // Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input).
        assignConstantSearchKeys.getInputs().add(dataSourceOp.getInputs().get(0));
        assignConstantSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode());
        inputOp = assignConstantSearchKeys;
    } else {
        // All index search keys are variables.
        inputOp = probeSubTree.root;
    }

    UnnestMapOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset,
            recordType, chosenIndex, inputOp, jobGenParams, context, false, retainInput);

    // Generate the rest of the upstream plan which feeds the search results into the primary index.
    UnnestMapOperator primaryIndexUnnestOp = null;
    boolean isPrimaryIndex = chosenIndex.isPrimaryIndex();
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        // External dataset
        ExternalDataLookupOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap(
                dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput,
                retainNull);
        indexSubTree.dataSourceRef.setValue(externalDataAccessOp);
        return externalDataAccessOp;
    } else if (!isPrimaryIndex) {
        primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType,
                secondaryIndexUnnestOp, context, true, retainInput, retainNull, false);

        // Replace the datasource scan with the new plan rooted at
        // primaryIndexUnnestMap.
        indexSubTree.dataSourceRef.setValue(primaryIndexUnnestOp);
    } else {
        List<Object> primaryIndexOutputTypes = new ArrayList<Object>();
        try {
            AccessMethodUtils.appendPrimaryIndexTypes(dataset, recordType, primaryIndexOutputTypes);
        } catch (IOException e) {
            throw new AlgebricksException(e);
        }
        List<LogicalVariable> scanVariables = dataSourceOp.getVariables();
        primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, secondaryIndexUnnestOp.getExpressionRef(),
                primaryIndexOutputTypes, retainInput);
        primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));

        if (!primaryIndexPostProccessingIsNeeded) {
            List<Mutable<ILogicalExpression>> remainingFuncExprs = new ArrayList<Mutable<ILogicalExpression>>();
            getNewConditionExprs(conditionRef, replacedFuncExprs, remainingFuncExprs);
            // Generate new condition.
            if (!remainingFuncExprs.isEmpty()) {
                ILogicalExpression pulledCond = createSelectCondition(remainingFuncExprs);
                conditionRef.setValue(pulledCond);
            } else {
                conditionRef.setValue(null);
            }
        }

        // Adds equivalence classes --- one equivalent class between a primary key
        // variable and a record field-access expression.
        EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, scanVariables,
                recordType, dataset, context);
    }

    return primaryIndexUnnestOp;
}

From source file:org.apache.asterix.optimizer.rules.am.BTreeAccessMethod.java

@Override
public ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef,
        OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex,
        AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull,
        boolean requiresBroadcast, IOptimizationContext context) throws AlgebricksException {
    Dataset dataset = indexSubTree.getDataset();
    ARecordType recordType = indexSubTree.getRecordType();
    ARecordType metaRecordType = indexSubTree.getMetaRecordType();
    // we made sure indexSubTree has datasource scan
    AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.getDataSourceRef()
            .getValue();//from  ww  w. ja va 2s.  c o m
    List<Pair<Integer, Integer>> exprAndVarList = analysisCtx.indexExprsAndVars.get(chosenIndex);
    List<IOptimizableFuncExpr> matchedFuncExprs = analysisCtx.matchedFuncExprs;
    int numSecondaryKeys = analysisCtx.indexNumMatchedKeys.get(chosenIndex);
    // List of function expressions that will be replaced by the secondary-index search.
    // These func exprs will be removed from the select condition at the very end of this method.
    Set<ILogicalExpression> replacedFuncExprs = new HashSet<>();

    // Info on high and low keys for the BTree search predicate.
    ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys];
    ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys];
    LimitType[] lowKeyLimits = new LimitType[numSecondaryKeys];
    LimitType[] highKeyLimits = new LimitType[numSecondaryKeys];
    boolean[] lowKeyInclusive = new boolean[numSecondaryKeys];
    boolean[] highKeyInclusive = new boolean[numSecondaryKeys];
    ILogicalExpression[] constantAtRuntimeExpressions = new ILogicalExpression[numSecondaryKeys];
    LogicalVariable[] constAtRuntimeExprVars = new LogicalVariable[numSecondaryKeys];

    /* TODO: For now we don't do any sophisticated analysis of the func exprs to come up with "the best" range
     * predicate. If we can't figure out how to integrate a certain funcExpr into the current predicate,
     * we just bail by setting this flag.*/
    boolean couldntFigureOut = false;
    boolean doneWithExprs = false;
    boolean isEqCondition = false;
    BitSet setLowKeys = new BitSet(numSecondaryKeys);
    BitSet setHighKeys = new BitSet(numSecondaryKeys);
    // Go through the func exprs listed as optimizable by the chosen index,
    // and formulate a range predicate on the secondary-index keys.

    // checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value
    // since we have a round issues when dealing with LT(<) OR GT(>) operator.
    boolean realTypeConvertedToIntegerType;

    for (Pair<Integer, Integer> exprIndex : exprAndVarList) {
        // Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs.
        IOptimizableFuncExpr optFuncExpr = matchedFuncExprs.get(exprIndex.first);
        int keyPos = indexOf(optFuncExpr.getFieldName(0), chosenIndex.getKeyFieldNames());
        if (keyPos < 0 && optFuncExpr.getNumLogicalVars() > 1) {
            // If we are optimizing a join, the matching field may be the second field name.
            keyPos = indexOf(optFuncExpr.getFieldName(1), chosenIndex.getKeyFieldNames());
        }
        if (keyPos < 0) {
            throw new AlgebricksException(
                    "Could not match optimizable function expression to any index field name.");
        }
        Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils
                .createSearchKeyExpr(optFuncExpr, indexSubTree, probeSubTree);
        ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first;
        if (searchKeyExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
            constantAtRuntimeExpressions[keyPos] = searchKeyExpr;
            constAtRuntimeExprVars[keyPos] = context.newVar();
            searchKeyExpr = new VariableReferenceExpression(constAtRuntimeExprVars[keyPos]);

        }
        realTypeConvertedToIntegerType = returnedSearchKeyExpr.second;

        LimitType limit = getLimitType(optFuncExpr, probeSubTree);

        // If a DOUBLE or FLOAT constant is converted to an INT type value,
        // we need to check a corner case where two real values are located between an INT value.
        // For example, for the following query,
        //
        // for $emp in dataset empDataset
        // where $emp.age > double("2.3") and $emp.age < double("3.3")
        // return $emp.id
        //
        // It should generate a result if there is a tuple that satisfies the condition, which is 3,
        // however, it does not generate the desired result since finding candidates
        // fail after truncating the fraction part (there is no INT whose value is greater than 2 and less than 3.)
        //
        // Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates.
        // This does not change the result of an actual comparison since this conversion is only applied
        // for finding candidates from an index.
        //
        if (realTypeConvertedToIntegerType) {
            if (limit == LimitType.HIGH_EXCLUSIVE) {
                limit = LimitType.HIGH_INCLUSIVE;
            } else if (limit == LimitType.LOW_EXCLUSIVE) {
                limit = LimitType.LOW_INCLUSIVE;
            }
        }

        switch (limit) {
        case EQUAL: {
            if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) {
                lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit;
                lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true;
                lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr;
                setLowKeys.set(keyPos);
                setHighKeys.set(keyPos);
                isEqCondition = true;
            } else {
                // Has already been set to the identical values.
                // When optimizing join we may encounter the same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
                        && lowKeyExprs[keyPos].equals(searchKeyExpr) && highKeyLimits[keyPos] == limit
                        && highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    isEqCondition = true;
                    break;
                }
                couldntFigureOut = true;
            }
            // TODO: For now don't consider prefix searches.
            // If high and low keys are set, we exit for now.
            if (setLowKeys.cardinality() == numSecondaryKeys && setHighKeys.cardinality() == numSecondaryKeys) {
                doneWithExprs = true;
            }
            break;
        }
        case HIGH_EXCLUSIVE: {
            if (highKeyLimits[keyPos] == null || (highKeyLimits[keyPos] != null && highKeyInclusive[keyPos])) {
                highKeyLimits[keyPos] = limit;
                highKeyExprs[keyPos] = searchKeyExpr;
                highKeyInclusive[keyPos] = false;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the
                // same optimizable expression twice
                // (once from analyzing each side of the join)
                if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == false
                        && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case HIGH_INCLUSIVE: {
            if (highKeyLimits[keyPos] == null) {
                highKeyLimits[keyPos] = limit;
                highKeyExprs[keyPos] = searchKeyExpr;
                highKeyInclusive[keyPos] = true;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the
                // same optimizable expression twice
                // (once from analyzing each side of the join)
                if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true
                        && highKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case LOW_EXCLUSIVE: {
            if (lowKeyLimits[keyPos] == null || (lowKeyLimits[keyPos] != null && lowKeyInclusive[keyPos])) {
                lowKeyLimits[keyPos] = limit;
                lowKeyExprs[keyPos] = searchKeyExpr;
                lowKeyInclusive[keyPos] = false;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the
                // same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == false
                        && lowKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        case LOW_INCLUSIVE: {
            if (lowKeyLimits[keyPos] == null) {
                lowKeyLimits[keyPos] = limit;
                lowKeyExprs[keyPos] = searchKeyExpr;
                lowKeyInclusive[keyPos] = true;
            } else {
                // Has already been set to the identical values. When optimizing join we may encounter the
                // same optimizable expression twice
                // (once from analyzing each side of the join)
                if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
                        && lowKeyExprs[keyPos].equals(searchKeyExpr)) {
                    break;
                }
                couldntFigureOut = true;
                doneWithExprs = true;
            }
            break;
        }
        default: {
            throw new IllegalStateException();
        }
        }
        if (!couldntFigureOut) {
            // Remember to remove this funcExpr later.
            replacedFuncExprs.add(matchedFuncExprs.get(exprIndex.first).getFuncExpr());
        }
        if (doneWithExprs) {
            break;
        }
    }
    if (couldntFigureOut) {
        return null;
    }

    // If the select condition contains mixed open/closed intervals on multiple keys, then we make all intervals
    // closed to obtain a superset of answers and leave the original selection in place.
    boolean primaryIndexPostProccessingIsNeeded = false;
    for (int i = 1; i < numSecondaryKeys; ++i) {
        if (lowKeyInclusive[i] != lowKeyInclusive[0]) {
            Arrays.fill(lowKeyInclusive, true);
            primaryIndexPostProccessingIsNeeded = true;
            break;
        }
    }
    for (int i = 1; i < numSecondaryKeys; ++i) {
        if (highKeyInclusive[i] != highKeyInclusive[0]) {
            Arrays.fill(highKeyInclusive, true);
            primaryIndexPostProccessingIsNeeded = true;
            break;
        }
    }

    // determine cases when prefix search could be applied
    for (int i = 1; i < lowKeyExprs.length; i++) {
        if (lowKeyLimits[0] == null && lowKeyLimits[i] != null
                || lowKeyLimits[0] != null && lowKeyLimits[i] == null
                || highKeyLimits[0] == null && highKeyLimits[i] != null
                || highKeyLimits[0] != null && highKeyLimits[i] == null) {
            numSecondaryKeys--;
            primaryIndexPostProccessingIsNeeded = true;
        }
    }
    if (lowKeyLimits[0] == null) {
        lowKeyInclusive[0] = true;
    }
    if (highKeyLimits[0] == null) {
        highKeyInclusive[0] = true;
    }

    // Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index
    // search.
    // List of variables for the assign.
    ArrayList<LogicalVariable> keyVarList = new ArrayList<LogicalVariable>();
    // List of variables and expressions for the assign.
    ArrayList<LogicalVariable> assignKeyVarList = new ArrayList<LogicalVariable>();
    ArrayList<Mutable<ILogicalExpression>> assignKeyExprList = new ArrayList<Mutable<ILogicalExpression>>();
    int numLowKeys = createKeyVarsAndExprs(numSecondaryKeys, lowKeyLimits, lowKeyExprs, assignKeyVarList,
            assignKeyExprList, keyVarList, context, constantAtRuntimeExpressions, constAtRuntimeExprVars);
    int numHighKeys = createKeyVarsAndExprs(numSecondaryKeys, highKeyLimits, highKeyExprs, assignKeyVarList,
            assignKeyExprList, keyVarList, context, constantAtRuntimeExpressions, constAtRuntimeExprVars);

    BTreeJobGenParams jobGenParams = new BTreeJobGenParams(chosenIndex.getIndexName(), IndexType.BTREE,
            dataset.getDataverseName(), dataset.getDatasetName(), retainInput, requiresBroadcast);
    jobGenParams.setLowKeyInclusive(lowKeyInclusive[0]);
    jobGenParams.setHighKeyInclusive(highKeyInclusive[0]);
    jobGenParams.setIsEqCondition(isEqCondition);
    jobGenParams.setLowKeyVarList(keyVarList, 0, numLowKeys);
    jobGenParams.setHighKeyVarList(keyVarList, numLowKeys, numHighKeys);

    ILogicalOperator inputOp = null;
    if (!assignKeyVarList.isEmpty()) {
        // Assign operator that sets the constant secondary-index search-key fields if necessary.
        AssignOperator assignConstantSearchKeys = new AssignOperator(assignKeyVarList, assignKeyExprList);
        // Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input).
        assignConstantSearchKeys.getInputs().add(new MutableObject<ILogicalOperator>(
                OperatorManipulationUtil.deepCopy(dataSourceOp.getInputs().get(0).getValue())));
        assignConstantSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode());
        inputOp = assignConstantSearchKeys;
    } else {
        // All index search keys are variables.
        inputOp = probeSubTree.getRoot();
    }

    ILogicalOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset,
            recordType, metaRecordType, chosenIndex, inputOp, jobGenParams, context, false, retainInput,
            retainNull);

    // Generate the rest of the upstream plan which feeds the search results into the primary index.
    AbstractUnnestMapOperator primaryIndexUnnestOp = null;

    boolean isPrimaryIndex = chosenIndex.isPrimaryIndex();
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        // External dataset
        UnnestMapOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap(
                dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput,
                retainNull);
        indexSubTree.getDataSourceRef().setValue(externalDataAccessOp);
        return externalDataAccessOp;
    } else if (!isPrimaryIndex) {
        primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType,
                metaRecordType, secondaryIndexUnnestOp, context, true, retainInput, retainNull, false);

        // Adds equivalence classes --- one equivalent class between a primary key
        // variable and a record field-access expression.
        EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp,
                dataSourceOp.getVariables(), recordType, metaRecordType, dataset, context);
    } else {
        List<Object> primaryIndexOutputTypes = new ArrayList<Object>();
        AccessMethodUtils.appendPrimaryIndexTypes(dataset, recordType, metaRecordType, primaryIndexOutputTypes);
        List<LogicalVariable> scanVariables = dataSourceOp.getVariables();

        // Checks whether the primary index search can replace the given
        // SELECT condition.
        // If so, condition will be set to null and eventually the SELECT
        // operator will be removed.
        // If not, we create a new condition based on remaining ones.
        if (!primaryIndexPostProccessingIsNeeded) {
            List<Mutable<ILogicalExpression>> remainingFuncExprs = new ArrayList<Mutable<ILogicalExpression>>();
            getNewConditionExprs(conditionRef, replacedFuncExprs, remainingFuncExprs);
            // Generate new condition.
            if (!remainingFuncExprs.isEmpty()) {
                ILogicalExpression pulledCond = createSelectCondition(remainingFuncExprs);
                conditionRef.setValue(pulledCond);
            } else {
                conditionRef.setValue(null);
            }
        }

        // Checks whether LEFT_OUTER_UNNESTMAP operator is required.
        boolean leftOuterUnnestMapRequired = false;
        if (retainNull && retainInput) {
            leftOuterUnnestMapRequired = true;
        } else {
            leftOuterUnnestMapRequired = false;
        }

        if (conditionRef.getValue() != null) {
            // The job gen parameters are transferred to the actual job gen
            // via the UnnestMapOperator's function arguments.
            List<Mutable<ILogicalExpression>> primaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
            jobGenParams.writeToFuncArgs(primaryIndexFuncArgs);
            // An index search is expressed as an unnest-map over an
            // index-search function.
            IFunctionInfo primaryIndexSearch = FunctionUtil
                    .getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH);
            UnnestingFunctionCallExpression primaryIndexSearchFunc = new UnnestingFunctionCallExpression(
                    primaryIndexSearch, primaryIndexFuncArgs);
            primaryIndexSearchFunc.setReturnsUniqueValues(true);
            if (!leftOuterUnnestMapRequired) {
                primaryIndexUnnestOp = new UnnestMapOperator(scanVariables,
                        new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes,
                        retainInput);
            } else {
                primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables,
                        new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes,
                        true);
            }
        } else {
            if (!leftOuterUnnestMapRequired) {
                primaryIndexUnnestOp = new UnnestMapOperator(scanVariables,
                        ((UnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(),
                        primaryIndexOutputTypes, retainInput);
            } else {
                primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables,
                        ((LeftOuterUnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(),
                        primaryIndexOutputTypes, true);
            }
        }

        primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));

        // Adds equivalence classes --- one equivalent class between a primary key
        // variable and a record field-access expression.
        EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, scanVariables,
                recordType, metaRecordType, dataset, context);
    }

    return primaryIndexUnnestOp;
}

From source file:gov.noaa.pfel.erddap.dataset.EDDTableFromNcFiles.java

/** NOT FOR GENERAL USE. Bob uses this to consolidate the individual GTSPP
 * data files into 30 x 30 x 1 month files (tiles).
 * 30 x 30 leads to 12x6=72 files for a given time point, so a request
 * for a short time but entire world opens ~72 files.
 * There are ~240 months worth of data, so a request for a small lon lat 
 * range for all time opens ~240 files.//from w w  w  .  jav  a  2s.  c o  m
 *
 * <p>Why tile? Because there are ~10^6 profiles/year now, so ~10^7 total.
 * And if 100 bytes of info per file for EDDTableFromFiles fileTable, that's 1 GB!.
 * So there needs to be fewer files.
 * We want to balance number of files for 1 time point (all region tiles), 
 * and number of time point files (I'll stick with their use of 1 month).
 * The tiling size selected is ok, but searches for single profile (by name)
 * are slow since a given file may have a wide range of station_ids.
 *
 * <p>Quality flags
 * <br>https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf
 * <br>http://www.ifremer.fr/gosud/formats/gtspp_qcflags.htm
 * <br>CODE  SIGNIFICATION
 * <br>0     NOT CONTROLLED VALUE
 * <br>1     CORRECT VALUE
 * <br>2     VALUE INCONSISTENT WITH STATISTICS
 * <br>3     DOUBTFUL VALUE (spike, ...)
 * <br>4     FALSE VALUE (out of scale, constant profile, vertical instability, ...)
 * <br>5     VALUE MODIFIED DURING QC (only for interpolate location or date)
 * <br>6-8   Not USED
 * <br>9     NO VALUE
 * <br>
 * <br>I interpret as: okay values are 1, 2, 5
 *
 * @param firstYear  e.g., 1990
 * @param firstMonth e.g., 1  (1..)
 * @param lastYear  e.g., 2010
 * @param lastMonth e.g., 12  (1..)     
 * @param testMode if true, this just processes .nc files 
 *    already in testTempDir f:/data/gtspp/testTemp/
 *    and puts results in testDestDir f:/data/gtspp/testDest/.
 *    So the first/last/Year/Month params are ignored.
 */
public static void bobConsolidateGtsppTgz(int firstYear, int firstMonth, int lastYear, int lastMonth,
        boolean testMode) throws Throwable {

    int chunkSize = 45; //lon width, lat height of a tile, in degrees
    int minLat = -90;
    int maxLat = 90;
    int minLon = -180;
    int maxLon = 180;
    String today = Calendar2.getCurrentISODateTimeStringZulu().substring(0, 10); //to nearest day
    String sevenZip = "c:\\progra~1\\7-Zip\\7z";
    String zipDir = "c:\\data\\gtspp\\bestNcZip\\"; //gtspp_at199001.tgz
    String destDir = "c:\\data\\gtspp\\bestNcConsolidated\\";
    String tempDir = "c:\\data\\gtspp\\temp\\";
    String testTempDir = "c:\\data\\gtspp\\testTemp\\"; //tempDir if testMode=true 
    String testDestDir = "c:\\data\\gtspp\\testDest\\"; //destDir if testMode=true
    String logFile = "c:\\data\\gtspp\\log" + String2.replaceAll(today, "-", "") + ".txt";
    File2.makeDirectory(tempDir);
    //https://www.nodc.noaa.gov/GTSPP/document/qcmans/qcflags.htm
    //1=correct, 2=probably correct, 5=modified (so now correct)
    //pre 2012-04-15 was {1,2,5}
    //pre 2012-05-25 was {1,2}
    int okQF[] = { 1, 2, 5 };
    String okQFCsv = String2.toCSSVString(okQF);
    float depthMV = 99999; //was -99;
    float temperatureMV = 99999; //was -99;
    float salinityMV = 99999; //was -99;
    int qMV = 9;
    String timeUnits = "days since 1900-01-01 00:00:00"; //causes roundoff error(!)
    double timeBaseAndFactor[] = Calendar2.getTimeBaseAndFactor(timeUnits);
    //impossible values:
    float minDepth = -0.4f, maxDepth = 10000; //-0.4 allows for imprecise values
    float minTemperature = -4, maxTemperature = 40;
    float minSalinity = 0, maxSalinity = 41;

    if (testMode) {
        firstYear = 1990;
        firstMonth = 1;
        lastYear = 1990;
        lastMonth = 1;
    }

    SSR.verbose = false;

    String2.setupLog(true, false, logFile, false, 1000000000);
    String2.log("*** starting bobConsolidateGtsppTgz " + Calendar2.getCurrentISODateTimeStringLocalTZ() + "\n"
            + "logFile=" + String2.logFileName() + "\n" + String2.standardHelpAboutMessage());
    long elapsedTime = System.currentTimeMillis();
    //q_pos (position quality flag), q_date_time (time quality flag)
    int stationCol = -1, organizationCol = -1, dataTypeCol = -1, platformCol = -1, cruiseCol = -1,
            longitudeCol = -1, latitudeCol = -1, timeCol = -1, depthCol = -1, temperatureCol = -1,
            salinityCol = -1;
    int totalNGoodStation = 0, totalNGoodPos = 0, totalNGoodTime = 0, totalNGoodDepth = 0,
            totalNGoodTemperature = 0, totalNGoodSalinity = 0;
    int totalNBadStation = 0, totalNBadPos = 0, totalNBadTime = 0, totalNBadDepth = 0, totalNBadTemperature = 0,
            totalNBadSalinity = 0, totalNWarnings = 0, totalNExceptions = 0;
    long totalNGoodRows = 0, totalNBadRows = 0;
    StringArray impossibleNanLat = new StringArray();
    StringArray impossibleMinLat = new StringArray();
    StringArray impossibleMaxLat = new StringArray();
    StringArray impossibleNanLon = new StringArray();
    StringArray impossibleMinLon = new StringArray();
    StringArray impossibleMaxLon = new StringArray();
    //StringArray impossibleNaNDepth = new StringArray();
    StringArray impossibleMinDepth = new StringArray();
    StringArray impossibleMaxDepth = new StringArray();
    //StringArray impossibleNanTemperature = new StringArray();
    StringArray impossibleMinTemperature = new StringArray();
    StringArray impossibleMaxTemperature = new StringArray();
    //StringArray impossibleNanSalinity = new StringArray();
    StringArray impossibleMinSalinity = new StringArray();
    StringArray impossibleMaxSalinity = new StringArray();
    int nLons = 0, nLats = 0, nFiles = 0;
    int lonSum = 0, latSum = 0;
    long profilesSum = 0;
    long rowsSum = 0;

    //*** process a month's data
    int year = firstYear;
    int month = firstMonth;
    long chunkTime = System.currentTimeMillis();
    while (year <= lastYear) {
        String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ() + " start processing year=" + year
                + " month=" + month);

        String zMonth = String2.zeroPad("" + month, 2);
        String zMonth1 = String2.zeroPad("" + (month + 1), 2);
        double minEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth + "-01");
        double maxEpochSeconds = Calendar2.isoStringToEpochSeconds(year + "-" + zMonth1 + "-01");

        //destination directory
        String tDestDir = testMode ? testDestDir : destDir + year + "\\" + zMonth + "\\";
        File2.makeDirectory(tDestDir);
        HashMap tableHashMap = new HashMap();
        //make sure all files are deleted 
        int waitSeconds = 2;
        int nAttempts = 10;
        long cmdTime = System.currentTimeMillis();
        String cmd = "del/q " + tDestDir + "*.*";
        for (int attempt = 0; attempt < nAttempts; attempt++) {
            if (attempt % 8 == 0) {
                String2.log(cmd);
                SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds
                //File2.deleteAllFiles(tempDir);  //previous method
            }
            Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle
            File destDirFile = new File(tDestDir);
            File files[] = destDirFile.listFiles();
            String2.log("  nRemainingFiles=" + files.length);
            if (files.length == 0)
                break;
            waitSeconds = 2 * nAttempts;
        }
        String2.log("  cmd total time=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime));

        //unzip all atlantic, indian, and pacific .zip files for that month 
        String region2[] = { "at", "in", "pa" };
        int nRegions = testMode ? 1 : 3;
        for (int region = 0; region < nRegions; region++) {
            String sourceBaseName = "gtspp4_" + region2[region] + year + zMonth;
            String sourceZipJustFileName = sourceBaseName + ".tgz";
            String sourceZipName = zipDir + sourceZipJustFileName;

            if (!testMode) {

                //delete all files in tempDir
                waitSeconds = 2;
                nAttempts = 10;
                cmdTime = System.currentTimeMillis();
                cmd = "del/q " + tempDir + "*.*";
                String2.log(""); //blank line
                for (int attempt = 0; attempt < nAttempts; attempt++) {
                    String2.log(cmd);
                    SSR.dosShell(cmd, 30 * 60); //30 minutes*60 seconds
                    //File2.deleteAllFiles(tempDir);  //previous method

                    //delete dirs too
                    File2.deleteAllFiles(tempDir, true, true);

                    Math2.gc(waitSeconds * 1000); //gtspp: give OS time to settle
                    String2.log("  " + Math2.memoryString());
                    File tempDirFile = new File(tempDir);
                    File files[] = tempDirFile.listFiles();
                    String2.log("  nRemainingFiles=" + files.length);
                    if (files.length == 0)
                        break;
                    waitSeconds = 2 * nAttempts;
                }
                String2.log("  cmd total time="
                        + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime));

                //unzip file into tempDir         //gtspp_at199001.zip
                cmd = sevenZip + " -y e " + sourceZipName + " -o" + tempDir + " -r";
                cmdTime = System.currentTimeMillis();
                String2.log("\n*** " + cmd);
                if (File2.isFile(sourceZipName)) {
                    try {
                        SSR.dosShell(cmd, 30 * 60); //10 minutes*60 seconds
                        String2.log("  cmd time="
                                + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime));

                        //extract from the .tar file   //gtspp4_at199001.tar
                        cmd = sevenZip + " -y e " + tempDir + sourceBaseName + ".tar -o" + tempDir + " -r";
                        cmdTime = System.currentTimeMillis();
                        String2.log("\n*** " + cmd);
                        SSR.dosShell(cmd, 120 * 60); //120 minutes*60 seconds
                        String2.log("  cmd time="
                                + Calendar2.elapsedTimeString(System.currentTimeMillis() - cmdTime));
                    } catch (Exception e) {
                        String2.log("Caught exception: " + MustBe.throwableToString(e));
                    }
                }

                //previous method
                //SSR.unzip(sourceZipName,
                //    tempDir, true, 100 * 60, null); //ignoreZipDirectories, timeOutSeconds 100 minutes
            }

            //read each file and put data in proper table
            String tTempDir = testMode ? testTempDir : tempDir;
            File tTempDirAsFile = new File(tTempDir);
            String sourceFileNames[] = tTempDirAsFile.list(); //just the file names
            String2.log("\nunzipped " + sourceFileNames.length + " files");
            int nSourceFileNames = //testMode? 100 : 
                    sourceFileNames.length;
            int nGoodStation = 0, nGoodPos = 0, nGoodTime = 0, nGoodDepth = 0, nGoodTemperature = 0,
                    nGoodSalinity = 0, nGoodRows = 0;
            int nBadStation = 0, nBadPos = 0, nBadTime = 0, nBadDepth = 0, nBadTemperature = 0,
                    nBadSalinity = 0, nBadRows = 0, nWarnings = 0, nExceptions = 0;
            long fileReadTime = System.currentTimeMillis();
            profilesSum += nSourceFileNames;
            for (int sfi = 0; sfi < nSourceFileNames; sfi++) {
                String sourceFileName = sourceFileNames[sfi];
                if (sfi % 10000 == 0) {
                    //if (sfi > 0)    //2012-12-13 commented out. Let Java handle it.
                    //    Math2.gc(3 * 1000); //gtspp: give OS time to settle
                    //high water mark is ~160 MB, so memory not a problem
                    String2.log("file #" + sfi + " " + Math2.memoryString());
                }

                if (!sourceFileName.endsWith(".nc")) {
                    //String2.log("ERROR: not a .nc file: " + sourceFileName);
                    continue;
                }

                NetcdfFile ncFile = null;

                try {
                    //get the station name
                    //gtspp_13635162_te_111.nc  gtspp_10313692_cu_111.nc
                    if (!sourceFileName.matches("gtspp_[0-9]+_.*\\.nc")) { //was "\\d+")) {//all digits
                        nBadStation++;
                        throw new SimpleException("Invalid sourceFileName=" + sourceFileName);
                    }
                    int po = sourceFileName.indexOf('_', 6);
                    if (po < 0) {
                        nBadStation++;
                        throw new SimpleException("Invalid sourceFileName=" + sourceFileName);
                    }
                    int station = String2.parseInt(sourceFileName.substring(6, po));
                    nGoodStation++;
                    String key = sourceZipJustFileName + " " + sourceFileName;

                    //open the file
                    ncFile = NcHelper.openFile(tTempDir + sourceFileName);
                    Variable var;
                    Attributes tVarAtts = new Attributes();
                    String tUnits;

                    //get all of the data 

                    //stream_ident
                    var = ncFile.findVariable("stream_ident");
                    String organization = "";
                    String dataType = "";
                    if (var == null) {
                        nWarnings++;
                        String2.log("WARNING: No stream_ident in " + sourceFileName);
                    } else {
                        PrimitiveArray streamPA = NcHelper.getPrimitiveArray(var);
                        if (streamPA instanceof StringArray && streamPA.size() > 0) {
                            String stream = streamPA.getString(0);
                            if (stream.length() >= 4) {
                                organization = stream.substring(0, 2).trim();
                                dataType = stream.substring(2, 4).trim();
                            } else {
                                String2.log("WARNING: stream_ident isn't a 4 char string: " + stream);
                            }
                        } else {
                            String2.log("WARNING: stream_ident isn't a StringArray: " + streamPA.toString());
                        }
                    }

                    //platform_code
                    var = ncFile.findVariable("gtspp_platform_code");
                    String platform = "";
                    if (var == null) {
                        //a small percentage have this problem
                        //nWarnings++;
                        //String2.log("WARNING: No gtspp_platform_code in " + sourceFileName);
                    } else {
                        PrimitiveArray pa = NcHelper.getPrimitiveArray(var);
                        if (pa instanceof StringArray && pa.size() > 0) {
                            platform = pa.getString(0).trim();
                            //String2.log("platform_code=" + platform_code);
                        } else {
                            String2.log("WARNING: gtspp_platform_code isn't a StringArray: " + pa.toString());
                        }
                    }

                    //cruise
                    var = ncFile.findVariable("cruise_id");
                    String cruise = "";
                    if (var == null) {
                        nWarnings++;
                        String2.log("WARNING: No cruise_id in " + sourceFileName);
                    } else {
                        PrimitiveArray cruisePA = NcHelper.getPrimitiveArray(var);
                        if (cruisePA instanceof StringArray && cruisePA.size() > 0) {
                            cruise = cruisePA.getString(0).trim();
                        } else {
                            String2.log("WARNING: cruise_id isn't a StringArray: " + cruisePA.toString());
                        }
                    }

                    //prof_type  is TEMP or PSAL so don't save it.
                    /*var = ncFile.findVariable("prof_type");                        
                    String prof_type = "";
                    if (var == null) {
                    nWarnings++;
                    String2.log("WARNING: No prof_type in " + sourceFileName);
                    } else {
                    PrimitiveArray pa = NcHelper.getPrimitiveArray(var);
                    if (pa instanceof StringArray && pa.size() > 0) {
                        prof_type = pa.getString(0).trim();
                        String2.log("prof_type=" + prof_type);
                    } else {
                        String2.log("WARNING: prof_type isn't a StringArray: " + 
                            pa.toString());
                    }
                    }*/

                    //position quality flag 
                    var = ncFile.findVariable("position_quality_flag"); //was "q_pos");                        
                    if (var == null) {
                        nWarnings++;
                        String2.log("WARNING: No position_quality_flag in " + sourceFileName);
                    } else {
                        PrimitiveArray q_pos = NcHelper.getPrimitiveArray(var);
                        if (!(q_pos instanceof IntArray) || q_pos.size() != 1)
                            throw new SimpleException("Invalid position_quality_flag=" + q_pos);
                        int ti = q_pos.getInt(0);
                        if (String2.indexOf(okQF, ti) < 0) {
                            nBadPos++;
                            continue;
                        }
                        //nGoodPos++; is below
                    }

                    //time quality flag 
                    var = ncFile.findVariable("time_quality_flag"); //q_date_time");                        
                    if (var == null) {
                        nWarnings++;
                        String2.log("WARNING: No time_quality_flag in " + sourceFileName);
                    } else {
                        PrimitiveArray q_date_time = NcHelper.getPrimitiveArray(var);
                        if (!(q_date_time instanceof IntArray) || q_date_time.size() != 1)
                            throw new SimpleException("Invalid time_quality_flag=" + q_date_time);
                        int ti = q_date_time.getInt(0);
                        if (String2.indexOf(okQF, ti) < 0) {
                            nBadTime++;
                            continue;
                        }
                        //nGoodTime is below
                    }

                    //time
                    var = ncFile.findVariable("time");
                    if (var == null)
                        throw new SimpleException("No time!");
                    tVarAtts.clear();
                    NcHelper.getVariableAttributes(var, tVarAtts);
                    tUnits = tVarAtts.getString("units");
                    if (!timeUnits.equals(tUnits))
                        throw new SimpleException("Invalid time units=" + tUnits);
                    PrimitiveArray time = NcHelper.getPrimitiveArray(var);
                    if (!(time instanceof DoubleArray) || time.size() != 1)
                        throw new SimpleException("Invalid time=" + time);
                    double tTime = Calendar2.unitsSinceToEpochSeconds(timeBaseAndFactor[0],
                            timeBaseAndFactor[1], time.getDouble(0));
                    if (tTime < minEpochSeconds || tTime > maxEpochSeconds)
                        throw new SimpleException(
                                "Invalid tTime=" + Calendar2.safeEpochSecondsToIsoStringTZ(tTime, ""));
                    //original times (that I looked at) are to nearest second
                    //so round to nearest second (fix .99999 problems)
                    tTime = Math.rint(tTime);
                    nGoodTime++;

                    //longitude  (position qFlag is good)
                    var = ncFile.findVariable("longitude");
                    if (var == null) {
                        impossibleNanLon.add(key + " lon=null");
                        continue;
                    }
                    PrimitiveArray longitude = NcHelper.getPrimitiveArray(var);
                    if (!(longitude instanceof FloatArray) || longitude.size() != 1) {
                        impossibleNanLon.add(key + " lon=wrongTypeOrSize");
                        continue;
                    }
                    float lon = longitude.getFloat(0);
                    if (Float.isNaN(lon)) {
                        impossibleNanLon.add(key + " lon=NaN");
                        continue;
                    } else if (lon < minLon) {
                        impossibleMinLon.add(key + " lon=" + lon);
                        //fall through
                    } else if (lon > maxLon) {
                        impossibleMaxLon.add(key + " lon=" + lon);
                        //fall through
                    }
                    lon = (float) Math2.anglePM180(lon);

                    //latitude (position qFlag is good)
                    var = ncFile.findVariable("latitude");
                    if (var == null) {
                        impossibleNanLat.add(key + " lat=null");
                        continue;
                    }
                    PrimitiveArray latitude = NcHelper.getPrimitiveArray(var);
                    if (!(latitude instanceof FloatArray) || latitude.size() != 1) {
                        impossibleNanLat.add(key + " lat=wrongTypeOrSize");
                        continue;
                    }
                    float lat = latitude.getFloat(0);
                    if (Float.isNaN(lat)) {
                        impossibleNanLat.add(key + " lat=NaN");
                        continue;
                    } else if (lat < minLat) {
                        impossibleMinLat.add(key + " lat=" + lat);
                        continue;
                    } else if (lat > maxLat) {
                        impossibleMaxLat.add(key + " lat=" + lat);
                        continue;
                    }
                    nGoodPos++;

                    //depth
                    var = ncFile.findVariable("z");
                    if (var == null)
                        throw new SimpleException("No z!");
                    PrimitiveArray depth = NcHelper.getPrimitiveArray(var);
                    if (!(depth instanceof FloatArray) || depth.size() == 0)
                        throw new SimpleException("Invalid z=" + depth);
                    int nDepth = depth.size();

                    //DEPH_qparm
                    var = ncFile.findVariable("z_variable_quality_flag"); //DEPH_qparm");                        
                    if (var == null)
                        throw new SimpleException("No z_variable_quality_flag!");
                    PrimitiveArray DEPH_qparm = NcHelper.getPrimitiveArray(var);
                    if (!(DEPH_qparm instanceof IntArray) || DEPH_qparm.size() != nDepth)
                        throw new SimpleException("Invalid z_variable_quality_flag=" + DEPH_qparm);
                    //nGoodDepth is below

                    //temperature
                    var = ncFile.findVariable("temperature");
                    PrimitiveArray temperature;
                    PrimitiveArray TEMP_qparm;
                    float temperatureFV = temperatureMV;
                    if (var == null) {
                        //nWarnings++;
                        //String2.log("WARNING: No temperature in " + sourceFileName); reasonably common
                        temperature = PrimitiveArray.factory(float.class, nDepth, "" + temperatureMV);
                        TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV);
                    } else {
                        temperature = NcHelper.getPrimitiveArray(var);
                        if (!(temperature instanceof FloatArray) || temperature.size() != nDepth)
                            throw new SimpleException("Invalid temperature=" + temperature);

                        tVarAtts.clear();
                        NcHelper.getVariableAttributes(var, tVarAtts);
                        temperatureFV = tVarAtts.getFloat("_FillValue");
                        if (!Float.isNaN(temperatureFV) && temperatureFV != temperatureMV)
                            throw new SimpleException("Invalid temperature _FillValue=" + temperatureFV);

                        //TEMP_qparm
                        var = ncFile.findVariable("temperature_quality_flag"); //TEMP_qparm");                        
                        if (var == null) {
                            nWarnings++;
                            String2.log("WARNING: No temperature_quality_flag in " + sourceFileName);
                            TEMP_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV);
                        } else {
                            TEMP_qparm = NcHelper.getPrimitiveArray(var);
                            if (!(TEMP_qparm instanceof IntArray) || TEMP_qparm.size() != nDepth)
                                throw new SimpleException("Invalid temperature_quality_flag=" + TEMP_qparm);
                        }
                    }

                    //salinity
                    var = ncFile.findVariable("salinity");
                    PrimitiveArray salinity;
                    PrimitiveArray PSAL_qparm;
                    float salinityFV = salinityMV;
                    if (var == null) {
                        //String2.log("WARNING: No salinity in " + sourceFileName);   //very common
                        salinity = PrimitiveArray.factory(float.class, nDepth, "" + salinityMV);
                        PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV);
                    } else {
                        salinity = NcHelper.getPrimitiveArray(var);
                        if (!(salinity instanceof FloatArray) || salinity.size() != nDepth)
                            throw new SimpleException("Invalid salinity=" + salinity);

                        tVarAtts.clear();
                        NcHelper.getVariableAttributes(var, tVarAtts);
                        salinityFV = tVarAtts.getFloat("_FillValue");
                        if (!Float.isNaN(salinityFV) && salinityFV != salinityMV)
                            throw new SimpleException("Invalid salinity _FillValue=" + salinityFV);

                        //PSAL_qparm
                        var = ncFile.findVariable("salinity_quality_flag"); //PSAL_qparm");                        
                        if (var == null) {
                            nWarnings++;
                            String2.log("WARNING: No salinity_quality_flag in " + sourceFileName);
                            PSAL_qparm = PrimitiveArray.factory(int.class, nDepth, "" + qMV);
                        } else {
                            PSAL_qparm = NcHelper.getPrimitiveArray(var);
                            if (!(PSAL_qparm instanceof IntArray) || PSAL_qparm.size() != nDepth)
                                throw new SimpleException("Invalid salinity_quality_flag=" + PSAL_qparm);
                        }
                    }

                    //clean the data
                    //(good to do it here so memory usage is low -- table remains as small as possible)
                    //Change "impossible" data to NaN
                    //(from https://www.nodc.noaa.gov/GTSPP/document/qcmans/GTSPP_RT_QC_Manual_20090916.pdf
                    //pg 61 has Table 2.1: Global Impossible Parameter Values).
                    BitSet keep = new BitSet();
                    keep.set(0, nDepth); //all true 

                    //find worst impossible depth/temperature/salinity for this station
                    //boolean tImpossibleNanDepth       = false;
                    //boolean tImpossibleNanTemperature = false;
                    //boolean tImpossibleNanSalinity    = false;
                    float tImpossibleMinDepth = minDepth;
                    float tImpossibleMaxDepth = maxDepth;
                    float tImpossibleMinTemperature = minTemperature;
                    float tImpossibleMaxTemperature = maxTemperature;
                    float tImpossibleMinSalinity = minSalinity;
                    float tImpossibleMaxSalinity = maxSalinity;

                    for (int row = 0; row < nDepth; row++) {

                        //DEPH_qparm
                        int qs = DEPH_qparm.getInt(row);
                        float f = depth.getFloat(row);
                        if (String2.indexOf(okQF, qs) < 0) {
                            nBadDepth++;
                            keep.clear(row);
                            continue;
                        } else if (Float.isNaN(f) || f == depthMV) { //"impossible" depth
                            //tImpossibleNanDepth = true;
                            nBadDepth++;
                            keep.clear(row);
                            continue;
                        } else if (f < minDepth) {
                            tImpossibleMinDepth = Math.min(tImpossibleMinDepth, f);
                            nBadDepth++;
                            keep.clear(row);
                            continue;
                        } else if (f > maxDepth) {
                            tImpossibleMaxDepth = Math.max(tImpossibleMaxDepth, f);
                            nBadDepth++;
                            keep.clear(row);
                            continue;
                        }
                        nGoodDepth++;

                        boolean hasData = false;

                        //temperature
                        qs = TEMP_qparm.getInt(row);
                        f = temperature.getFloat(row);
                        if (String2.indexOf(okQF, qs) < 0) {
                            temperature.setString(row, ""); //so bad value is now NaN
                            nBadTemperature++;
                        } else if (Float.isNaN(f) || f == temperatureMV) {
                            temperature.setString(row, ""); //so missing value is now NaN
                            nBadTemperature++;
                        } else if (f < minTemperature) { //"impossible" water temperature
                            tImpossibleMinTemperature = Math.min(tImpossibleMinTemperature, f);
                            temperature.setString(row, ""); //so impossible value is now NaN
                            nBadTemperature++;
                        } else if (f > maxTemperature) { //"impossible" water temperature
                            tImpossibleMaxTemperature = Math.max(tImpossibleMaxTemperature, f);
                            temperature.setString(row, ""); //so impossible value is now NaN
                            nBadTemperature++;
                        } else {
                            nGoodTemperature++;
                            hasData = true;
                        }

                        //salinity
                        qs = PSAL_qparm.getInt(row);
                        f = salinity.getFloat(row);
                        if (String2.indexOf(okQF, qs) < 0) {
                            salinity.setString(row, ""); //so bad value is now NaN
                            nBadSalinity++;
                        } else if (Float.isNaN(f) || f == salinityMV) {
                            salinity.setString(row, ""); //so missing value is now NaN
                            nBadSalinity++;
                        } else if (f < minSalinity) { //"impossible" salinity
                            tImpossibleMinSalinity = Math.min(tImpossibleMinSalinity, f);
                            salinity.setString(row, ""); //so impossible value is now NaN
                            nBadSalinity++;
                        } else if (f > maxSalinity) { //"impossible" salinity
                            tImpossibleMaxSalinity = Math.max(tImpossibleMaxSalinity, f);
                            salinity.setString(row, ""); //so impossible value is now NaN
                            nBadSalinity++;
                        } else {
                            nGoodSalinity++;
                            hasData = true;
                        }

                        //no valid temperature or salinity data?
                        if (!hasData) {
                            keep.clear(row);
                        }
                    }

                    //ensure sizes still correct
                    Test.ensureEqual(depth.size(), nDepth, "depth.size changed!");
                    Test.ensureEqual(temperature.size(), nDepth, "temperature.size changed!");
                    Test.ensureEqual(salinity.size(), nDepth, "salinity.size changed!");

                    //actually remove the bad rows
                    int tnGood = keep.cardinality();
                    if (testMode && verbose)
                        String2.log(
                                sourceFileName + ": nGoodRows=" + tnGood + " nBadRows=" + (nDepth - tnGood));
                    nGoodRows += tnGood;
                    nBadRows += nDepth - tnGood;
                    depth.justKeep(keep);
                    temperature.justKeep(keep);
                    salinity.justKeep(keep);
                    nDepth = depth.size();

                    //impossible
                    //if (tImpossibleNanDepth)
                    //     impossibleNanDepth.add(key + " hasNaN=true");
                    //if (tImpossibleNanTemperature)
                    //     impossibleNanTemperature.add(key + " hasNaN=true");
                    //if (tImpossibleNanSalinity)
                    //     impossibleNanSalinity.add(key + " hasNaN=true");

                    if (tImpossibleMinDepth < minDepth)
                        impossibleMinDepth.add(key + " worst = " + tImpossibleMinDepth);
                    if (tImpossibleMaxDepth > maxDepth)
                        impossibleMaxDepth.add(key + " worst = " + tImpossibleMaxDepth);
                    if (tImpossibleMinTemperature < minTemperature)
                        impossibleMinTemperature.add(key + " worst = " + tImpossibleMinTemperature);
                    if (tImpossibleMaxTemperature > maxTemperature)
                        impossibleMaxTemperature.add(key + " worst = " + tImpossibleMaxTemperature);
                    if (tImpossibleMinSalinity < minSalinity)
                        impossibleMinSalinity.add(key + " worst = " + tImpossibleMinSalinity);
                    if (tImpossibleMaxSalinity > maxSalinity)
                        impossibleMaxSalinity.add(key + " worst = " + tImpossibleMaxSalinity);

                    //which table
                    if (tnGood == 0)
                        continue;
                    int loni = Math2
                            .roundToInt(Math.floor((Math.min(lon, maxLon - 0.1f) - minLon) / chunkSize));
                    int lati = Math2
                            .roundToInt(Math.floor((Math.min(lat, maxLat - 0.1f) - minLat) / chunkSize));
                    String outTableName = (minLon + loni * chunkSize) + "E_" + (minLat + lati * chunkSize)
                            + "N";
                    //String2.replaceAll(cruise + "_" + organization + dataType, ' ', '_'); //too many: 3000+/month in 2011
                    Table tTable = (Table) tableHashMap.get(outTableName);

                    if (tTable == null) {

                        Attributes ncGlobalAtts = new Attributes();
                        NcHelper.getGlobalAttributes(ncFile, ncGlobalAtts);
                        String tHistory = ncGlobalAtts.getString("history");
                        tHistory = tHistory != null && tHistory.length() > 0 ? tHistory + "\n" : "";

                        //make a table for this platform
                        tTable = new Table();
                        Attributes ga = tTable.globalAttributes();
                        String ack = "These data were acquired from the US NOAA National Oceanographic Data Center (NODC) on "
                                + today + " from https://www.nodc.noaa.gov/GTSPP/.";
                        ga.add("acknowledgment", ack);
                        ga.add("license",
                                "These data are openly available to the public.  "
                                        + "Please acknowledge the use of these data with:\n" + ack + "\n\n"
                                        + "[standard]");
                        ga.add("history", tHistory
                                + ".tgz files from ftp.nodc.noaa.gov /pub/gtspp/best_nc/ (https://www.nodc.noaa.gov/GTSPP/)\n"
                                + today
                                + " Most recent ingest, clean, and reformat at ERD (bob.simons at noaa.gov).");
                        ga.add("infoUrl", "https://www.nodc.noaa.gov/GTSPP/");
                        ga.add("institution", "NOAA NODC");
                        ga.add("title", "Global Temperature and Salinity Profile Programme (GTSPP) Data");

                        String attName = "gtspp_ConventionVersion";
                        String attValue = ncGlobalAtts.getString(attName);
                        if (attValue != null && attValue.length() > 0)
                            ga.add(attName, attValue);

                        attName = "gtspp_program";
                        attValue = ncGlobalAtts.getString(attName);
                        if (attValue != null && attValue.length() > 0)
                            ga.add(attName, attValue);

                        attName = "gtspp_programVersion";
                        attValue = ncGlobalAtts.getString(attName);
                        if (attValue != null && attValue.length() > 0)
                            ga.add(attName, attValue);

                        attName = "gtspp_handbook_version";
                        attValue = ncGlobalAtts.getString(attName);
                        if (attValue != null && attValue.length() > 0)
                            ga.add(attName, attValue);

                        organizationCol = tTable.addColumn(tTable.nColumns(), "org", new StringArray(),
                                new Attributes());
                        platformCol = tTable.addColumn(tTable.nColumns(), "platform", new StringArray(),
                                new Attributes());
                        dataTypeCol = tTable.addColumn(tTable.nColumns(), "type", new StringArray(),
                                new Attributes());
                        cruiseCol = tTable.addColumn(tTable.nColumns(), "cruise", new StringArray(),
                                new Attributes());
                        stationCol = tTable.addColumn(tTable.nColumns(), "station_id", new IntArray(),
                                new Attributes());
                        longitudeCol = tTable.addColumn(tTable.nColumns(), "longitude", new FloatArray(),
                                (new Attributes()).add("units", EDV.LON_UNITS));
                        latitudeCol = tTable.addColumn(tTable.nColumns(), "latitude", new FloatArray(),
                                (new Attributes()).add("units", EDV.LAT_UNITS));
                        timeCol = tTable.addColumn(tTable.nColumns(), "time", new DoubleArray(),
                                (new Attributes()).add("units", EDV.TIME_UNITS));
                        depthCol = tTable.addColumn(tTable.nColumns(), "depth", new FloatArray(),
                                (new Attributes()).add("units", "m"));
                        temperatureCol = tTable.addColumn(tTable.nColumns(), "temperature", new FloatArray(),
                                (new Attributes()).add("units", "degree_C"));
                        salinityCol = tTable.addColumn(tTable.nColumns(), "salinity", new FloatArray(),
                                (new Attributes()).add("units", "1e-3")); //PSU changed to 1e-3 with CF std names 25

                        tableHashMap.put(outTableName, tTable);
                    }

                    //put data in tTable
                    int oNRows = tTable.nRows();
                    ((StringArray) tTable.getColumn(organizationCol)).addN(nDepth, organization);
                    ((StringArray) tTable.getColumn(platformCol)).addN(nDepth, platform);
                    ((StringArray) tTable.getColumn(dataTypeCol)).addN(nDepth, dataType);
                    ((StringArray) tTable.getColumn(cruiseCol)).addN(nDepth, cruise);
                    ((IntArray) tTable.getColumn(stationCol)).addN(nDepth, station);
                    ((FloatArray) tTable.getColumn(longitudeCol)).addN(nDepth, lon);
                    ((FloatArray) tTable.getColumn(latitudeCol)).addN(nDepth, lat);
                    ((DoubleArray) tTable.getColumn(timeCol)).addN(nDepth, tTime);
                    ((FloatArray) tTable.getColumn(depthCol)).append(depth);
                    ((FloatArray) tTable.getColumn(temperatureCol)).append(temperature);
                    ((FloatArray) tTable.getColumn(salinityCol)).append(salinity);

                    //ensure the table is valid (same size for each column)
                    tTable.ensureValid();

                } catch (Throwable t) {
                    nExceptions++;
                    String2.log(
                            "ERROR while processing " + sourceFileName + "\n  " + MustBe.throwableToString(t));
                } finally {
                    //always close the ncFile
                    if (ncFile != null) {
                        try {
                            ncFile.close();
                        } catch (Throwable t) {
                            String2.log("ERROR: unable to close " + sourceFileName + "\n"
                                    + MustBe.getShortErrorMessage(t));
                        }
                    }
                }
            }

            String2.log("\n  time to read all those files = "
                    + Calendar2.elapsedTimeString(System.currentTimeMillis() - fileReadTime));

            //end of region loop
            String2.log("\nIn zip=" + sourceZipName + "\n nExceptions=    " + nExceptions + "        nWarnings="
                    + nWarnings + "\n nBadStation=    " + nBadStation + "        nGoodStation=" + nGoodStation
                    + "\n nBadPos=        " + nBadPos + "        nGoodPos=" + nGoodPos + "\n nBadTime=       "
                    + nBadTime + "        nGoodTime=" + nGoodTime + "\n nBadDepth=      " + nBadDepth
                    + "        nGoodDepth=" + nGoodDepth + "\n nBadTemperature=" + nBadTemperature
                    + "        nGoodTemperature=" + nGoodTemperature + "\n nBadSalinity=   " + nBadSalinity
                    + "        nGoodSalinity=" + nGoodSalinity);
            totalNGoodStation += nGoodStation;
            totalNGoodPos += nGoodPos;
            totalNGoodTime += nGoodTime;
            totalNGoodDepth += nGoodDepth;
            totalNGoodTemperature += nGoodTemperature;
            totalNGoodSalinity += nGoodSalinity;
            totalNGoodRows += nGoodRows;
            totalNBadPos += nBadPos;
            totalNBadTime += nBadTime;
            totalNBadDepth += nBadDepth;
            totalNBadTemperature += nBadTemperature;
            totalNBadSalinity += nBadSalinity;
            totalNBadRows += nBadRows;
            totalNWarnings += nWarnings;
            totalNExceptions += nExceptions;
        } //end of region loop

        //save by outTableName
        boolean filePrinted = false;
        Object keys[] = tableHashMap.keySet().toArray();
        int nKeys = keys.length;
        String2.log("\n*** saving nFiles=" + nKeys);
        for (int keyi = 0; keyi < nKeys; keyi++) {
            String key = keys[keyi].toString();
            Table tTable = (Table) tableHashMap.remove(key);
            if (tTable == null || tTable.nRows() == 0) {
                String2.log("Unexpected: no table for key=" + key);
                continue;
            }

            //sort by time, station, depth  
            //depth matches the source files: from surface to deepest
            tTable.sort(new int[] { timeCol, stationCol, depthCol }, new boolean[] { true, true, true });

            //is this saving a small lat lon range?
            double stationStats[] = tTable.getColumn(stationCol).calculateStats();
            //double lonStats[]     = tTable.getColumn(longitudeCol).calculateStats();
            //double latStats[]     = tTable.getColumn(latitudeCol).calculateStats();
            //nLats++;
            //double latRange = latStats[PrimitiveArray.STATS_MAX] - latStats[PrimitiveArray.STATS_MIN];
            //latSum += latRange;
            rowsSum += tTable.nRows();
            String2.log("    stationRange=" + Math2.roundToInt(
                    stationStats[PrimitiveArray.STATS_MAX] - stationStats[PrimitiveArray.STATS_MIN]) +
            //"  lonRange="     + Math2.roundToInt(lonStats[    PrimitiveArray.STATS_MAX] - lonStats[    PrimitiveArray.STATS_MIN]) +
            //"  latRange="     + Math2.roundToInt(latRange) +
                    "  nRows=" + tTable.nRows());

            //save it
            String tName = tDestDir + String2.encodeFileNameSafe(key);
            /*if (lonStats[PrimitiveArray.STATS_MAX] > 45 &&
            lonStats[PrimitiveArray.STATS_MIN] < -45) {
                    
            //NO MORE: This happened with 1 file/cruise, 
            //  but won't happen now with lon/lat tiles.
            //crosses dateline (or widely across lon=0)?  split into 2 files
            Table ttTable = (Table)tTable.clone();
            ttTable.oneStepApplyConstraint(0, "longitude", "<", "0");
            ttTable.saveAsFlatNc(tName + "_W.nc", "row", false);
            double lonStatsW[] = ttTable.getColumn(longitudeCol).calculateStats();
            nLons++;
            double lonRangeW = lonStatsW[PrimitiveArray.STATS_MAX] - lonStatsW[PrimitiveArray.STATS_MIN];
            lonSum += lonRangeW;
                    
            ttTable = (Table)tTable.clone();
            ttTable.oneStepApplyConstraint(0, "longitude", ">=", "0");
            ttTable.saveAsFlatNc(tName + "_E.nc", "row", false);
            double lonStatsE[] = ttTable.getColumn(longitudeCol).calculateStats();
            nLons++;
            double lonRangeE = lonStatsE[PrimitiveArray.STATS_MAX] - lonStatsE[PrimitiveArray.STATS_MIN];
            lonSum += lonRangeE;
            String2.log("  westLonRange=" + Math2.roundToInt(lonRangeW) +
                        "  eastLonRange=" + Math2.roundToInt(lonRangeE));
            } else */
            {
                //nLons++;
                nFiles++;

                //create trajectory variable: platform + cruise
                StringArray pl = (StringArray) tTable.getColumn("platform");
                StringArray cr = (StringArray) tTable.getColumn("cruise");
                StringArray or = (StringArray) tTable.getColumn("org");
                StringArray ty = (StringArray) tTable.getColumn("type");
                StringArray tr = new StringArray();
                int n = pl.size();
                for (int i = 0; i < n; i++) {
                    pl.set(i, String2.whitespacesToSpace(pl.get(i)));
                    cr.set(i, String2.whitespacesToSpace(cr.get(i)));
                    or.set(i, String2.whitespacesToSpace(or.get(i)));
                    ty.set(i, String2.whitespacesToSpace(ty.get(i)));
                    tr.add(or.getString(i) + "_" + ty.getString(i) + "_" + pl.getString(i) + "_"
                            + cr.getString(i));
                }
                tTable.addColumn(0, "trajectory", tr, new Attributes());

                tTable.saveAsFlatNc(tName + ".nc", "row", false); //convertToFakeMissingValues  (keep mv's as NaNs)
            }

            //print a file
            if (testMode && !filePrinted) {
                filePrinted = true;
                String2.log(NcHelper.dumpString(tName, true));
            }
        }
        String2.log("\ncumulative nProfiles=" + profilesSum + " nRows=" + rowsSum + " mean nRows/file="
                + (rowsSum / Math.max(1, nFiles)));
        //if (nLats > 0) 
        //    String2.log(  "cumulative nLats=" + nLats + " meanLatRange=" + (float)(latSum / nLats));
        //if (nLons > 0) {
        //    String2.log(  "cumulative nLons=" + nLons + " meanLonRange=" + (float)(lonSum / nLons));
        //    String2.log("mean nRows per saved file = " + (rowsSum / nLons));
        //}

        //print list of impossible at end of year or end of run
        if (month == 12 || (year == lastYear && month == lastMonth)) {

            String2.log("\n*** " + Calendar2.getCurrentISODateTimeStringLocalTZ()
                    + " bobConsolidateGtsppTgz finished the chunk ending " + year + "-" + month + "\n"
                    + "chunkTime=" + Calendar2.elapsedTimeString(System.currentTimeMillis() - chunkTime));
            chunkTime = System.currentTimeMillis();

            //print impossible statistics
            String2.log("\nCumulative number of stations with:\n" + "impossibleNanLon         = "
                    + impossibleNanLon.size() + "\n" + "impossibleMinLon         = " + impossibleMinLon.size()
                    + "\n" + "impossibleMaxLon         = " + impossibleMaxLon.size() + "\n"
                    + "impossibleNanLat         = " + impossibleNanLat.size() + "\n"
                    + "impossibleMinLat         = " + impossibleMinLat.size() + "\n"
                    + "impossibleMaxLat         = " + impossibleMaxLat.size() + "\n"
                    + "impossibleMinDepth       = " + impossibleMinDepth.size() + "\n"
                    + "impossibleMaxDepth       = " + impossibleMaxDepth.size() + "\n" +
                    //"impossibleLatLon      = " + impossibleLatLon.size() + "\n" +
                    "impossibleMinTemperature = " + impossibleMinTemperature.size() + "\n"
                    + "impossibleMaxTemperature = " + impossibleMaxTemperature.size() + "\n"
                    + "impossibleMinSalinity    = " + impossibleMinSalinity.size() + "\n"
                    + "impossibleMaxSalinity    = " + impossibleMaxSalinity.size() + "\n");

            //lon
            String2.log("\n*** " + impossibleNanLon.size() + " stations had invalid lon"
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleNanLon.sortIgnoreCase();
            String2.log(impossibleNanLon.toNewlineString());

            String2.log("\n*** " + impossibleMinLon.size() + " stations had lon<" + minLon
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleMinLon.sortIgnoreCase();
            String2.log(impossibleMinLon.toNewlineString());

            String2.log("\n*** " + impossibleMaxLon.size() + " stations had lon>" + maxLon
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleMaxLon.sortIgnoreCase();
            String2.log(impossibleMaxLon.toNewlineString());

            //lat
            String2.log("\n*** " + impossibleNanLat.size() + " stations had invalid lat"
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleNanLat.sortIgnoreCase();
            String2.log(impossibleNanLat.toNewlineString());

            String2.log("\n*** " + impossibleMinLat.size() + " stations had lat<" + minLat
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleMinLat.sortIgnoreCase();
            String2.log(impossibleMinLat.toNewlineString());

            String2.log("\n*** " + impossibleMaxLat.size() + " stations had lat>" + maxLat
                    + " and good pos quality flags (" + okQFCsv + ").");
            impossibleMaxLat.sortIgnoreCase();
            String2.log(impossibleMaxLat.toNewlineString());

            //depth 
            String2.log("\n*** " + impossibleMinDepth.size() + " stations had depth<" + minDepth
                    + " and good depth quality flags (" + okQFCsv + ").");
            impossibleMinDepth.sortIgnoreCase();
            String2.log(impossibleMinDepth.toNewlineString());

            String2.log("\n*** " + impossibleMaxDepth.size() + " stations had depth>" + maxDepth
                    + " and good depth quality flags (" + okQFCsv + ").");
            impossibleMaxDepth.sortIgnoreCase();
            String2.log(impossibleMaxDepth.toNewlineString());

            //sa = impossibleLatLon.toArray();
            //Arrays.sort(sa);
            //String2.log("\n*** " + sa.length + " stations had impossible latitude or longitude values" +
            //    " and good q_pos quality flags.");
            //String2.log(String2.toNewlineString(sa));

            String2.log("\n*** " + impossibleMinTemperature.size() + " stations had temperature<"
                    + minTemperature + " and good temperature quality flags (" + okQFCsv + ").");
            impossibleMinTemperature.sortIgnoreCase();
            String2.log(impossibleMinTemperature.toNewlineString());

            String2.log("\n*** " + impossibleMaxTemperature.size() + " stations had temperature>"
                    + maxTemperature + " and good temperature quality flags (" + okQFCsv + ").");
            impossibleMaxTemperature.sortIgnoreCase();
            String2.log(impossibleMaxTemperature.toNewlineString());

            String2.log("\n*** " + impossibleMinSalinity.size() + " stations had salinity<" + minSalinity
                    + " and good salinity quality flags (" + okQFCsv + ").");
            impossibleMinSalinity.sortIgnoreCase();
            String2.log(impossibleMinSalinity.toNewlineString());

            String2.log("\n*** " + impossibleMaxSalinity.size() + " stations had salinity>" + maxSalinity
                    + " and good salinity quality flags (" + okQFCsv + ").");
            impossibleMaxSalinity.sortIgnoreCase();
            String2.log(impossibleMaxSalinity.toNewlineString());

        }

        //are we done?
        if (year == lastYear && month == lastMonth)
            break;

        //increment the month
        month++;
        if (month == 13) {
            year++;
            month = 1;
        }

    } //end of month/year loop

    String2.log("\n*** bobConsolidateGtspp completely finished " + firstYear + "-" + firstMonth + " through "
            + lastYear + "-" + lastMonth);

    String2.log("\n***" + "\ntotalNExceptions=    " + totalNExceptions + "        totalNWarnings=       "
            + totalNWarnings + "\ntotalNBadStation=    " + totalNBadStation + "        totalNGoodStation=    "
            + totalNGoodStation + "\ntotalNBadPos=        " + totalNBadPos + "        totalNGoodPos=        "
            + totalNGoodPos + "\ntotalNBadTime=       " + totalNBadTime + "        totalNGoodTime=       "
            + totalNGoodTime + "\ntotalNBadDepth=      " + totalNBadDepth + "        totalNGoodDepth=      "
            + totalNGoodDepth + "\ntotalNBadTemperature=" + totalNBadTemperature
            + "        totalNGoodTemperature=" + totalNGoodTemperature + "\ntotalNBadSalinity=   "
            + totalNBadSalinity + "        totalNGoodSalinity=   " + totalNGoodSalinity
            + "\ntotalNBadRows=       " + totalNBadRows + "        totalNGoodRows=       " + totalNGoodRows
            + "\nlogFile=F:/data/gtspp/log.txt" + "\n\n*** all finished time="
            + Calendar2.elapsedTimeString(System.currentTimeMillis() - elapsedTime));
    String2.returnLoggingToSystemOut();
}