Example usage for java.util Arrays deepToString

List of usage examples for java.util Arrays deepToString

Introduction

In this page you can find the example usage for java.util Arrays deepToString.

Prototype

public static String deepToString(Object[] a) 

Source Link

Document

Returns a string representation of the "deep contents" of the specified array.

Usage

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

private void decodeData(BufferedInputStream stream) throws IOException {

    dbgLog.fine("\n***** decodeData(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }//from  ww w .  j  av a 2 s  .c  o m

    //int nvar = (Integer)smd.getFileInformation().get("varQnty");
    int nvar = dataTable.getVarQuantity().intValue();
    //int nobs = (Integer)smd.getFileInformation().get("caseQnty");
    int nobs = dataTable.getCaseQuantity().intValue();

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")");
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("bytes per row=" + bytes_per_row + " bytes");
    }

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes));
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("StringLengthTable=" + StringLengthTable);
    }

    // create a File object to save the tab-delimited data file
    FileOutputStream fileOutTab = null;
    PrintWriter pwout = null;
    File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");

    // save the temp tab-delimited file in the return ingest object:        
    ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);

    fileOutTab = new FileOutputStream(tabDelimitedDataFile);
    pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);

    /* Should we lose this dateFormat thing in 4.0? 
     * the UNF should be calculatable on the app side solely from the data
     * stored in the tab file and the type information stored the dataVariable
     * object. 
     * furthermore, the very idea of storing a format entry not just for 
     * every variable, but for every value/observation is a bit strange. 
     * TODO: review and confirm that, in the 3.* implementation, every
     * entry in dateFormat[nvar][*] is indeed the same - except for the 
     * missing value entries. -- L.A. 4.0
      (OK, I got rid of the dateFormat; instead I kinda sorta assume
      that the format is the same for every value in a column, save for 
      the missing values... like this: 
      dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format);
      BUT, this needs to be reviewed/confirmed etc! 
     */
    //String[][] dateFormat = new String[nvar][nobs];

    for (int i = 0; i < nobs; i++) {
        byte[] dataRowBytes = new byte[bytes_per_row];
        Object[] dataRow = new Object[nvar];

        int nbytes = stream.read(dataRowBytes, 0, bytes_per_row);

        if (nbytes == 0) {
            String errorMessage = "reading data: no data were read at(" + i + "th row)";
            throw new IOException(errorMessage);
        }
        // decoding each row
        int byte_offset = 0;
        for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) {

            Integer varType = variableTypeMap.get(variableTypes[columnCounter]);

            // 4.0 Check if this is a time/date variable: 
            boolean isDateTimeDatum = false;
            String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory();
            if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) {
                isDateTimeDatum = true;
            }

            String variableFormat = dateVariableFormats[columnCounter];

            switch (varType != null ? varType : 256) {
            case -5:
                // Byte case
                // note: 1 byte signed
                byte byte_datum = dataRowBytes[byte_offset];

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum);
                }
                if (byte_datum >= BYTE_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    dataRow[columnCounter] = byte_datum;
                }

                byte_offset++;
                break;
            case -4:
                // Stata-int (=java's short: 2byte) case
                // note: 2-byte signed int, not java's int
                ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2);
                if (isLittleEndian) {
                    int_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                short short_datum = int_buffer.getShort();

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum);
                }
                if (short_datum >= INT_MISSIG_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value="
                                + short_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {

                        DecodedDateTime ddt = decodeDateTimeData("short", variableFormat,
                                Short.toString(short_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        //dateFormat[columnCounter][i] = ddt.format;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = short_datum;
                    }
                }
                byte_offset += 2;
                break;
            case -3:
                // stata-Long (= java's int: 4 byte) case
                // note: 4-byte singed, not java's long
                //dbgLog.fine("DATreader: stata long");

                ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    long_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                int int_datum = long_buffer.getInt();

                if (dbgLog.isLoggable(Level.FINE)) {
                    //dbgLog.fine(i + "-th row " + columnCounter
                    //        + "=th column stata long =" + int_datum);
                }
                if (int_datum >= LONG_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINE)) {
                        //dbgLog.fine(i + "-th row " + columnCounter
                        //        + "=th column stata long missing value=" + int_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("int", variableFormat,
                                Integer.toString(int_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = int_datum;
                    }

                }
                byte_offset += 4;
                break;
            case -2:
                // float case
                // note: 4-byte
                ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    float_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                float float_datum = float_buffer.getFloat();

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum);
                }
                if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value="
                                + float_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;

                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("float", variableFormat,
                                doubleNumberFormatter.format(float_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = float_datum;
                        // This may be temporary - but for now (as in, while I'm testing 
                        // 4.0 ingest against 3.* ingest, I need to be able to tell if a 
                        // floating point value was a single, or double float in the 
                        // original STATA file: -- L.A. Jul. 2014
                        dataTable.getDataVariables().get(columnCounter).setFormat("float");
                    }

                }
                byte_offset += 4;
                break;
            case -1:
                // double case
                // note: 8-byte
                ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8);
                if (isLittleEndian) {
                    double_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                double double_datum = double_buffer.getDouble();

                if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value="
                                + double_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("double", variableFormat,
                                doubleNumberFormatter.format(double_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = doubleNumberFormatter.format(double_datum);
                    }

                }
                byte_offset += 8;
                break;
            case 0:
                // String case
                int strVarLength = StringLengthTable.get(columnCounter);
                String raw_datum = new String(
                        Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)),
                        "ISO-8859-1");
                // TODO: 
                // is it the right thing to do, to default to "ISO-8859-1"?
                // (it may be; since there's no mechanism for specifying
                // alternative encodings in Stata, this may be their default;
                // it just needs to be verified. -- L.A. Jul. 2014)
                String string_datum = getNullStrippedString(raw_datum);
                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum);
                }
                if (string_datum.isEmpty()) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value="
                                + string_datum);
                    }
                    // TODO: 
                    /* Is this really a missing value case? 
                     * Or is it an honest empty string? 
                     * Is there such a thing as a missing value for a String in Stata?
                     * -- L.A. 4.0
                     */
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    /*
                     * Some special characters, like new lines and tabs need to 
                     * be escaped - otherwise they will break our TAB file 
                     * structure! 
                     * But before we escape anything, all the back slashes 
                     * already in the string need to be escaped themselves.
                     */
                    String escapedString = string_datum.replace("\\", "\\\\");
                    // escape quotes: 
                    escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\""));
                    // escape tabs and new lines:
                    escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t"));
                    escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n"));
                    escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r"));
                    // the escaped version of the string is stored in the tab file 
                    // enclosed in double-quotes; this is in order to be able 
                    // to differentiate between an empty string (tab-delimited empty string in 
                    // double quotes) and a missing value (tab-delimited empty string). 
                    // Although the question still remains - is it even possible 
                    // to store an empty string, that's not a missing value, in Stata? 
                    // - see the comment in the missing value case above. -- L.A. 4.0
                    dataRow[columnCounter] = "\"" + escapedString + "\"";
                }
                byte_offset += strVarLength;
                break;
            default:
                dbgLog.fine("unknown variable type found");
                String errorMessage = "unknow variable Type found at data section";
                throw new InvalidObjectException(errorMessage);
            } // switch
        } // for-columnCounter

        // Dump the row of data to the tab-delimited file we are producing:
        pwout.println(StringUtils.join(dataRow, "\t"));

        if (dbgLog.isLoggable(Level.FINE)) {
            //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};");
        }

    } // for- i (row)

    pwout.close();

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes));
    }

    dbgLog.fine("DTA Ingest: decodeData(): end.");

}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java

private String getUNF(Object[] varData, String[] dateFormat, String variableType, String unfVersionNumber,
        int variablePosition) throws IOException {
    String unfValue = null;// www  . j  ava2s. c  o  m
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine(variablePosition + "-th varData:\n" + Arrays.deepToString(varData));
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("variableType=" + variableType);
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("unfVersionNumber=" + unfVersionNumber);
    Integer var_Type = variableTypeMap.get(variableType);
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("var_Type=" + var_Type);
    Map<String, Integer> catStat = null;
    switch (var_Type != null ? var_Type : 256) {
    case -5:
        // Byte case
        dbgLog.fine("byte case");

        Byte[] bdata = new Byte[varData.length];
        for (int i = 0; i < varData.length; i++) {
            bdata[i] = (Byte) varData[i];
        }

        unfValue = UNF5Util.calculateUNF((Byte[]) bdata);

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics((Byte[]) bdata)));

        catStat = StatHelper.calculateCategoryStatistics((Byte[]) bdata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;
    case -4:
        // Stata-int (=java's short: 2byte) case
        dbgLog.fine("stata int case");
        // note: 2-byte signed int, not java's int

        Short[] sdata = new Short[varData.length];
        for (int i = 0; i < varData.length; i++) {
            sdata[i] = (Short) varData[i];
        }
        unfValue = UNF5Util.calculateUNF(sdata);

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(sdata)));

        catStat = StatHelper.calculateCategoryStatistics(sdata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;
    case -3:
        // stata-Long (= java's int: 4 byte) case
        dbgLog.fine("stata long case");
        // note: 4-byte signed, not java's long

        Integer[] idata = new Integer[varData.length];
        for (int i = 0; i < varData.length; i++) {
            idata[i] = (Integer) varData[i];
        }
        unfValue = UNF5Util.calculateUNF(idata);

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(idata)));

        catStat = StatHelper.calculateCategoryStatistics(idata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);
        break;
    case -2:
        // float case
        dbgLog.fine("float case");
        // note: 4-byte
        Float[] fdata = new Float[varData.length];
        for (int i = 0; i < varData.length; i++) {
            fdata[i] = (Float) varData[i];
        }

        unfValue = UNF5Util.calculateUNF(fdata);

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(fdata)));

        if (valueLabelSchemeMappingTable.containsKey(variableNameList.get(variablePosition))) {
            catStat = StatHelper.calculateCategoryStatistics(fdata);
        }
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;
    case -1:
        // double case
        dbgLog.fine("double case");
        // note: 8-byte

        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            ddata[i] = (Double) varData[i];
        }

        unfValue = UNF5Util.calculateUNF(ddata);

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

        if (valueLabelSchemeMappingTable.containsKey(variableNameList.get(variablePosition))) {
            catStat = StatHelper.calculateCategoryStatistics(ddata);
        }
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;
    case 0:
        // String case
        dbgLog.fine("string case");
        String[] strdata = new String[varData.length];
        for (int i = 0; i < varData.length; i++) {
            strdata[i] = (String) varData[i];
        }
        dbgLog.fine("strdata=" + Arrays.deepToString(strdata));
        dbgLog.fine("strdata: " + Arrays.deepToString(strdata));
        dbgLog.fine("dateFormats: " + Arrays.deepToString(dateFormat));
        unfValue = UNF5Util.calculateUNF(strdata, dateFormat);
        dbgLog.fine("UNF = " + unfValue);

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("string:unfValue" + unfValue);
        // Shoud summary statistics be calculated on dates?
        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch
      //} // for-loop
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("unfvalue(last)=" + unfValue);
    return unfValue;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeData(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeData(): start *****");

    String fileUnfValue = null;//from w ww. jav  a 2s. co  m
    String[] unfValues = null;

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }
    if (isDataSectionCompressed) {
        decodeRecordTypeDataCompressed(stream);
    } else {
        decodeRecordTypeDataUnCompressed(stream);
    }

    unfValues = new String[varQnty];

    dbgLog.fine("variableTypeFinal:" + Arrays.toString(variableTypeFinal));

    for (int j = 0; j < varQnty; j++) {
        //int variableTypeNumer = variableTypelList.get(j) > 0 ? 1 : 0;
        int variableTypeNumer = variableTypeFinal[j];
        try {
            dbgLog.finer("j = " + j);
            dbgLog.finer("dataTable2[j] = " + Arrays.deepToString(dataTable2[j]));
            dbgLog.warning("dateFormats[j] = " + Arrays.deepToString(dateFormats[j]));
            unfValues[j] = getUNF(dataTable2[j], dateFormats[j], variableTypeNumer, unfVersionNumber, j);
            dbgLog.fine(j + "th unf value" + unfValues[j]);

        } catch (NumberFormatException ex) {
            ex.printStackTrace();
        } catch (UnfException ex) {
            ex.printStackTrace();
        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        } catch (NoSuchAlgorithmException ex) {
            ex.printStackTrace();
        }
    }

    dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues));

    try {
        fileUnfValue = UNF5Util.calculateUNF(unfValues);

    } catch (NumberFormatException ex) {
        ex.printStackTrace();
    } catch (IOException ex) {
        //ex.printStackTrace();
        throw ex;
    }

    dbgLog.fine("file-unf=" + fileUnfValue);

    savDataSection.setUnf(unfValues);

    savDataSection.setFileUnf(fileUnfValue);

    smd.setVariableUNF(unfValues);

    smd.getFileInformation().put("fileUNF", fileUnfValue);

    dbgLog.fine("unf values:\n" + unfValues);

    savDataSection.setData(dataTable2);
    dbgLog.fine("dataTable2:\n" + Arrays.deepToString(dataTable2));

    dbgLog.fine("***** decodeRecordTypeData(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
    }//from   ww w.  j a  va2s  .com
    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>();

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    dbgLog.fine("printFormatNameTable:\n" + printFormatNameTable);
    variableFormatTypeList = new String[varQnty];

    dbgLog.fine("varQnty: " + varQnty);

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // contents (variable) checker concering decimals
    variableTypeFinal = new int[varQnty];
    Arrays.fill(variableTypeFinal, 0);

    List<String> casewiseRecordForUNF = new ArrayList<String>();
    String[] caseWiseDateFormatForUNF = null;
    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF
    dateFormats = new String[varQnty][caseQnty];

    try {
        // this compression is applied only to non-float data, i.e. integer;
        // 8-byte float datum is kept in tact
        boolean hasReachedEOF = false;

        OBSERVATION: while (true) {

            dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");

            byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];

            int nbytes = stream.read(octate);

            // processCompressedOBSblock ()

            // (this means process a block of 8 compressed OBS
            // values -- should result in 64 bytes of data total)

            for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {

                dbgLog.finer("i=" + i + "-th iteration");
                int octate_i = octate[i];
                //dbgLog.fine("octate="+octate_i);
                if (octate_i < 0) {
                    octate_i += 256;
                }
                int byteCode = octate_i;//octate_i & 0xF;
                //out.println("byeCode="+byteCode);

                // processCompressedOBS

                switch (byteCode) {
                case 252:
                    // end of the file
                    dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
                    hasReachedEOF = true;
                    break;
                case 253:
                    // FD: uncompressed data follows after this octate
                    // long string datum or float datum
                    // read the following octate
                    byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
                    int ucbytes = stream.read(uncompressedByte);
                    int typeIndex = (ii * OBS + i) % nOBS;

                    if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) {
                        // code= >0 |-1: string or its conitiguous block
                        // decode as a string object
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
                        //out.println("str_datum="+strdatum+"<-");
                        // add this non-missing-value string datum
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == -2) {
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1),
                                defaultCharSet);
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == 0) {
                        // code= 0: numeric

                        ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
                        if (isLittleEndian) {
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
                        }

                        Double ddatum = bb_double.getDouble();
                        // out.println("ddatum="+ddatum);
                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                        dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);

                    } else {
                        dbgLog.fine("SAV Reader: out-of-range exception");
                        throw new IOException("out-of-range value was found");
                    }

                    /*
                    // EOF-check after reading this octate
                    if (stream.available() == 0){
                    hasReachedEOF = true;
                    dbgLog.fine(
                    "SAV Reader: *** After reading an uncompressed octate," +
                    " reached the end of the file at "+ii
                    +"th iteration and i="+i+"th octate position [0-start] *****");
                    }
                     */

                    break;
                case 254:
                    // FE: used as the missing value for string variables
                    // an empty case in a string variable also takes this value
                    // string variable does not accept space-only data
                    // cf: uncompressed case
                    // 20 20 20 20 20 20 20 20
                    // add the string missing value
                    // out.println("254: String missing data");

                    casewiseRecordForTabFile.add(" "); // add "." here?

                    // Note that technically this byte flag (254/xFE) means
                    // that *eight* white space characters should be
                    // written to the output stream. This caused me
                    // a great amount of confusion, because it appeared
                    // to me that there was a mismatch between the number
                    // of bytes advertised in the variable metadata and
                    // the number of bytes actually found in the data
                    // section of a compressed SAV file; this is because
                    // these 8 bytes "come out of nowhere"; they are not
                    // written in the data section, but this flag specifies
                    // that they should be added to the output.
                    // Also, as I pointed out above, we are only writing
                    // out one whitespace character, not 8 as instructed.
                    // This appears to be legit; these blocks of 8 spaces
                    // seem to be only used for padding, and all such
                    // multiple padding spaces are stripped anyway during
                    // the post-processing.

                    break;
                case 255:
                    // FF: system missing value for numeric variables
                    // cf: uncompressed case (sysmis)
                    // FF FF FF FF FF FF eF FF(little endian)
                    // add the numeric missing value
                    dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
                    casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);

                    break;
                case 0:
                    // 00: do nothing
                    dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");

                    break;
                default:
                    //out.println("byte code(default)="+ byteCode);
                    if ((byteCode > 0) && (byteCode < 252)) {
                        // datum is compressed
                        //Integer unCompressed = Integer.valueOf(byteCode -100);
                        // add this uncompressed numeric datum
                        Double unCompressed = Double.valueOf(byteCode - 100);
                        dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);

                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
                        // out.println("uncompressed="+unCompressed);
                        // out.println("dataline="+casewiseRecordForTabFile);
                    }
                }// end of switch

                // out.println("end of switch");

                // The-end-of-a-case(row)-processing

                // this line that follows, and the code around it
                // is really confusing:
                int varCounter = (ii * OBS + i + 1) % nOBS;
                // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
                // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
                // the 2 values mean different things:
                // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
                // and OBS is the number of OBS blocks that we process
                // at a time. I.e., we process 8 chunks of 8 bytes at a time.
                // This is how data is organized inside an SAV file:
                // 8 bytes of compression flags, followd by 8x8 or fewer
                // (depending on the flags) bytes of compressed data.
                // I should rename this OBS variable something more
                // meaningful.
                //
                // Also, the "varCounter" variable name is entirely
                // misleading -- it counts not variables, but OBS blocks.

                dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");

                if ((ii * OBS + i + 1) % nOBS == 0) {

                    //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);

                    // out.println("all variables in a case are parsed == nOBS");
                    // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);

                    // check whether a string-variable's continuous block exits
                    // if so, they must be joined

                    if (hasStringVarContinuousBlock) {

                        // string-variable's continuous-block-concatenating-processing

                        //out.println("concatenating process starts");
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                        //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                        StringBuilder sb = new StringBuilder("");
                        int firstPosition = 0;

                        Set<Integer> removeJset = new HashSet<Integer>();
                        for (int j = 0; j < nOBS; j++) {
                            dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                            if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) {
                                // Continued String variable found at j-th
                                // position. look back the j-1
                                firstPosition = j - 1;
                                int lastJ = j;
                                String concatenated = null;

                                removeJset.add(j);
                                sb.append(casewiseRecordForTabFile.get(j - 1));
                                sb.append(casewiseRecordForTabFile.get(j));

                                for (int jc = 1;; jc++) {
                                    if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1)
                                            && (OBSwiseTypelList.get(j + jc) != -2))) {

                                        // j is the end unit of this string variable
                                        concatenated = sb.toString();
                                        sb.setLength(0);
                                        lastJ = j + jc;
                                        break;
                                    } else {
                                        sb.append(casewiseRecordForTabFile.get(j + jc));
                                        removeJset.add(j + jc);
                                    }
                                }
                                casewiseRecordForTabFile.set(j - 1, concatenated);

                                //out.println(j-1+"th concatenated="+concatenated);
                                j = lastJ - 1;

                            } // end-of-if: continuous-OBS only

                        } // end of loop-j

                        //out.println("removeJset="+removeJset);

                        // a new list that stores a new case with concatanated string data
                        List<String> newDataLine = new ArrayList<String>();

                        for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                            //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                            if (!removeJset.contains(jl)) {

                                //                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
                                //                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
                                //                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl)==null){
                                //                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
                                //                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
                                //                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
                                //                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                }

                                newDataLine.add(casewiseRecordForTabFile.get(jl));
                            } else {
                                //                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
                            }
                        } // end of loop-jl

                        //out.println("new casewiseRecordForTabFile="+newDataLine);
                        //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());

                        casewiseRecordForTabFile = newDataLine;

                    } // end-if: stringContinuousVar-exist case

                    for (int el = 0; el < casewiseRecordForTabFile.size(); el++) {
                        casewiseRecordForUNF.add(casewiseRecordForTabFile.get(el));
                    }

                    caseWiseDateFormatForUNF = new String[casewiseRecordForTabFile.size()];

                    // caseIndex starts from 1 not 0
                    caseIndex = (ii * OBS + i + 1) / nOBS;

                    for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                        dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));

                        if (variableTypelList.get(k) > 0) {
                            // String variable case: set to  -1
                            variableTypeFinal[k] = -1;

                            // Strip the String variables off the
                            // whitespace padding:

                            // [ snipped ]

                            // I've removed the block of code above where
                            // String values were substring()-ed to the
                            // length specified in the variable metadata;
                            // Doing that was not enough, since a string
                            // can still be space-padded inside its
                            // advertised capacity. (note that extended
                            // variables can have many kylobytes of such
                            // padding in them!) Plus it was completely
                            // redundant, since we are stripping all the
                            // trailing white spaces with
                            // StringUtils.stripEnd() below:

                            String paddRemoved = StringUtils
                                    .stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                            // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                            if (paddRemoved.equals("")) {
                                paddRemoved = " ";
                            }

                            casewiseRecordForUNF.set(k, paddRemoved);
                            casewiseRecordForTabFile.set(k, "\""
                                    + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");

                            // end of String var case

                        } else {
                            // numeric var case
                            if (casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
                                casewiseRecordForUNF.set(k, null);

                            }

                        } // end of variable-type check

                        if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k)
                                .equals(MissingValueForTextDataFileNumeric)) {

                            String variableFormatType = variableFormatTypeList[k];
                            dbgLog.finer("k=" + k + "th printFormatTable format="
                                    + printFormatTable.get(variableNameList.get(k)));

                            int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                            if (variableFormatType.equals("date")) {
                                dbgLog.finer("date case");

                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;

                                String newDatum = sdf_ymd.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                caseWiseDateFormatForUNF[k] = sdf_ymd.toPattern();
                                /* saving date format */
                                dbgLog.finer("setting caseWiseDateFormatForUNF[k] = " + sdf_ymd.toPattern());
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                                //formatCategoryTable.put(variableNameList.get(k), "date");
                            } else if (variableFormatType.equals("time")) {
                                dbgLog.finer("time case:DTIME or DATETIME or TIME");
                                //formatCategoryTable.put(variableNameList.get(k), "time");

                                if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_BIAS;
                                        String newDatum = sdf_dhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_dhms.format(new Date(dateDatum)));
                                        dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }

                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_OFFSET;
                                        String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern();
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_ymdhms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
                                        String newDatum = sdf_hms.format(new Date(dateDatum));
                                        caseWiseDateFormatForUNF[k] = sdf_hms.toPattern();
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_hms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        caseWiseDateFormatForUNF[k] = this.sdf_hms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                }

                            } else if (variableFormatType.equals("other")) {
                                dbgLog.finer("other non-date/time case:=" + i);

                                if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                                    // day of week
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    casewiseRecordForUNF.set(k, newDatum);
                                    dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                                    // month
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    casewiseRecordForUNF.set(k, newDatum);
                                    dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                }
                            }

                        } // end: date-time-datum check

                    } // end: loop-k(2nd: variable-wise-check)

                    // write to tab file
                    if (casewiseRecordForTabFile.size() > 0) {
                        pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
                    }

                    if (casewiseRecordForTabFile.size() > 0) {
                        for (int ij = 0; ij < varQnty; ij++) {
                            dataTable2[ij][caseIndex - 1] = casewiseRecordForUNF.get(ij);

                            if (variableFormatTypeList[ij].equals("date")
                                    || variableFormatTypeList[ij].equals("time")) {
                                this.dateFormats[ij][caseIndex - 1] = caseWiseDateFormatForUNF[ij];
                            }
                        }
                    }

                    // numeric contents-check
                    for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {

                        if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                                || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                                || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                            variableTypeFinal[l] = -1;
                        }

                        if (variableTypeFinal[l] == 0) {
                            if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                                // TODO - check for large numbers
                                // l-th variable is not integer
                                variableTypeFinal[l] = 1;
                                decimalVariableSet.add(l);
                            }
                        }
                    }

                    // reset the case-wise working objects
                    casewiseRecordForUNF.clear();
                    casewiseRecordForTabFile.clear();

                    if (caseQnty > 0) {
                        if (caseIndex == caseQnty) {
                            hasReachedEOF = true;
                        }
                    }

                    if (hasReachedEOF) {
                        break;
                    }

                } // if(The-end-of-a-case(row)-processing)

            } // loop-i (OBS unit)

            if ((hasReachedEOF) || (stream.available() == 0)) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break OBSERVATION;
            }

            ii++;

        } // while loop

        pwout.close();
    } catch (IOException ex) {
        throw ex;
    }

    smd.setDecimalVariables(decimalVariableSet);
    smd.getFileInformation().put("caseQnty", caseQnty);
    smd.setVariableFormatCategory(formatCategoryTable);
    dbgLog.info("<<<<<<");
    //dbgLog.info("printFormatList = " + printFormatList);
    //dbgLog.info("printFormatNameTable = " + printFormatNameTable);
    dbgLog.info("formatCategoryTable = " + formatCategoryTable);
    dbgLog.info(">>>>>>");

    // contents check
    //out.println("variableType="+ArrayUtils.toString(variableTypeFinal));
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);
    //out.println("variableTypelList=\n"+ variableTypelList.toString());

    // out.println("dataTable2:\n"+Arrays.deepToString(dataTable2));
    dbgLog.fine("***** decodeRecordTypeDataCompressed(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
    }//from ww w. java2  s.c o  m

    PrintWriter pwout = createOutputWriter(stream);

    int varQnty = dataTable.getVarQuantity().intValue();
    int caseQnty = dataTable.getCaseQuantity().intValue();

    dbgLog.fine("varQnty: " + varQnty);

    dateFormatList = new String[varQnty];

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha
    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    try {
        // this compression is applied only to non-float data, i.e. integer;
        // 8-byte float datum is kept in tact
        boolean hasReachedEOF = false;

        OBSERVATION: while (true) {

            dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");

            byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];

            int nbytes = stream.read(octate);

            // processCompressedOBSblock ()

            // (this means process a block of 8 compressed OBS
            // values -- should result in 64 bytes of data total)

            for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {

                dbgLog.finer("i=" + i + "-th iteration");
                int octate_i = octate[i];
                //dbgLog.fine("octate="+octate_i);
                if (octate_i < 0) {
                    octate_i += 256;
                }
                int byteCode = octate_i;//octate_i & 0xF;
                //out.println("byeCode="+byteCode);

                // processCompressedOBS

                switch (byteCode) {
                case 252:
                    // end of the file
                    dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
                    hasReachedEOF = true;
                    break;
                case 253:
                    // FD: uncompressed data follows after this octate
                    // long string datum or float datum
                    // read the following octate
                    byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
                    int ucbytes = stream.read(uncompressedByte);
                    int typeIndex = (ii * OBS + i) % nOBS;

                    if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) {
                        // code= >0 |-1: string or its conitiguous block
                        // decode as a string object
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
                        //out.println("str_datum="+strdatum+"<-");
                        // add this non-missing-value string datum
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == -2) {
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1),
                                defaultCharSet);
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == 0) {
                        // code= 0: numeric

                        ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
                        if (isLittleEndian) {
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
                        }

                        Double ddatum = bb_double.getDouble();
                        // out.println("ddatum="+ddatum);
                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                        dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);

                    } else {
                        dbgLog.fine("SAV Reader: out-of-range exception");
                        throw new IOException("out-of-range value was found");
                    }

                    /*
                    // EOF-check after reading this octate
                    if (stream.available() == 0){
                    hasReachedEOF = true;
                    dbgLog.fine(
                    "SAV Reader: *** After reading an uncompressed octate," +
                    " reached the end of the file at "+ii
                    +"th iteration and i="+i+"th octate position [0-start] *****");
                    }
                     */

                    break;
                case 254:
                    // FE: used as the missing value for string variables
                    // an empty case in a string variable also takes this value
                    // string variable does not accept space-only data
                    // cf: uncompressed case
                    // 20 20 20 20 20 20 20 20
                    // add the string missing value
                    // out.println("254: String missing data");

                    casewiseRecordForTabFile.add(" "); // add "." here?

                    // Note that technically this byte flag (254/xFE) means
                    // that *eight* white space characters should be
                    // written to the output stream. This caused me
                    // a great amount of confusion, because it appeared
                    // to me that there was a mismatch between the number
                    // of bytes advertised in the variable metadata and
                    // the number of bytes actually found in the data
                    // section of a compressed SAV file; this is because
                    // these 8 bytes "come out of nowhere"; they are not
                    // written in the data section, but this flag specifies
                    // that they should be added to the output.
                    // Also, as I pointed out above, we are only writing
                    // out one whitespace character, not 8 as instructed.
                    // This appears to be legit; these blocks of 8 spaces
                    // seem to be only used for padding, and all such
                    // multiple padding spaces are stripped anyway during
                    // the post-processing.

                    break;
                case 255:
                    // FF: system missing value for numeric variables
                    // cf: uncompressed case (sysmis)
                    // FF FF FF FF FF FF eF FF(little endian)
                    // add the numeric missing value
                    dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
                    casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);

                    break;
                case 0:
                    // 00: do nothing
                    dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");

                    break;
                default:
                    //out.println("byte code(default)="+ byteCode);
                    if ((byteCode > 0) && (byteCode < 252)) {
                        // datum is compressed
                        //Integer unCompressed = Integer.valueOf(byteCode -100);
                        // add this uncompressed numeric datum
                        Double unCompressed = Double.valueOf(byteCode - 100);
                        dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);

                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
                        // out.println("uncompressed="+unCompressed);
                        // out.println("dataline="+casewiseRecordForTabFile);
                    }
                }// end of switch

                // out.println("end of switch");

                // The-end-of-a-case(row)-processing

                // this line that follows, and the code around it
                // is really confusing:
                int varCounter = (ii * OBS + i + 1) % nOBS;
                // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
                // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
                // the 2 values mean different things:
                // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
                // and OBS is the number of OBS blocks that we process
                // at a time. I.e., we process 8 chunks of 8 bytes at a time.
                // This is how data is organized inside an SAV file:
                // 8 bytes of compression flags, followd by 8x8 or fewer
                // (depending on the flags) bytes of compressed data.
                // I should rename this OBS variable something more
                // meaningful.
                //
                // Also, the "varCounter" variable name is entirely
                // misleading -- it counts not variables, but OBS blocks.

                dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");

                if ((ii * OBS + i + 1) % nOBS == 0) {

                    //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);

                    // out.println("all variables in a case are parsed == nOBS");
                    // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);

                    // check whether a string-variable's continuous block exits
                    // if so, they must be joined

                    if (hasStringVarContinuousBlock) {

                        // string-variable's continuous-block-concatenating-processing

                        //out.println("concatenating process starts");
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                        //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                        StringBuilder sb = new StringBuilder("");
                        int firstPosition = 0;

                        Set<Integer> removeJset = new HashSet<Integer>();
                        for (int j = 0; j < nOBS; j++) {
                            dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                            if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) {
                                // Continued String variable found at j-th
                                // position. look back the j-1
                                firstPosition = j - 1;
                                int lastJ = j;
                                String concatenated = null;

                                removeJset.add(j);
                                sb.append(casewiseRecordForTabFile.get(j - 1));
                                sb.append(casewiseRecordForTabFile.get(j));

                                for (int jc = 1;; jc++) {
                                    if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1)
                                            && (OBSwiseTypelList.get(j + jc) != -2))) {

                                        // j is the end unit of this string variable
                                        concatenated = sb.toString();
                                        sb.setLength(0);
                                        lastJ = j + jc;
                                        break;
                                    } else {
                                        sb.append(casewiseRecordForTabFile.get(j + jc));
                                        removeJset.add(j + jc);
                                    }
                                }
                                casewiseRecordForTabFile.set(j - 1, concatenated);

                                //out.println(j-1+"th concatenated="+concatenated);
                                j = lastJ - 1;

                            } // end-of-if: continuous-OBS only

                        } // end of loop-j

                        //out.println("removeJset="+removeJset);

                        // a new list that stores a new case with concatanated string data
                        List<String> newDataLine = new ArrayList<String>();

                        for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                            //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                            if (!removeJset.contains(jl)) {

                                //                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
                                //                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
                                //                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl)==null){
                                //                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
                                //                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
                                //                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
                                //                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                }

                                newDataLine.add(casewiseRecordForTabFile.get(jl));
                            } else {
                                //                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
                            }
                        } // end of loop-jl

                        //out.println("new casewiseRecordForTabFile="+newDataLine);
                        //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());

                        casewiseRecordForTabFile = newDataLine;

                    } // end-if: stringContinuousVar-exist case

                    // caseIndex starts from 1 not 0
                    caseIndex = (ii * OBS + i + 1) / nOBS;

                    for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                        dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));

                        if (variableTypelList.get(k) > 0) {

                            // Strip the String variables off the
                            // whitespace padding:

                            // [ snipped ]

                            // I've removed the block of code above where
                            // String values were substring()-ed to the
                            // length specified in the variable metadata;
                            // Doing that was not enough, since a string
                            // can still be space-padded inside its
                            // advertised capacity. (note that extended
                            // variables can have many kylobytes of such
                            // padding in them!) Plus it was completely
                            // redundant, since we are stripping all the
                            // trailing white spaces with
                            // StringUtils.stripEnd() below:

                            String paddRemoved = StringUtils
                                    .stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                            // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                            if (paddRemoved.equals("")) {
                                paddRemoved = " ";
                            }
                            //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                            casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                            // end of String var case

                        } // end of variable-type check

                        if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k)
                                .equals(MissingValueForTextDataFileNumeric)) {

                            String variableFormatType = variableFormatTypeList[k];
                            dbgLog.finer("k=" + k + "th printFormatTable format="
                                    + printFormatTable.get(variableNameList.get(k)));

                            int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                            if (variableFormatType.equals("date")) {
                                dbgLog.finer("date case");

                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;

                                String newDatum = sdf_ymd.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                /* saving date format */
                                dbgLog.finer("saving dateFormat[k] = " + sdf_ymd.toPattern());
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymd.toPattern();
                                //formatCategoryTable.put(variableNameList.get(k), "date");
                            } else if (variableFormatType.equals("time")) {
                                dbgLog.finer("time case:DTIME or DATETIME or TIME");
                                //formatCategoryTable.put(variableNameList.get(k), "time");

                                if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {
                                    // We're not even going to try to handle "DTIME"
                                    // values as time/dates in dataverse; this is a weird
                                    // format that nobody uses outside of SPSS.
                                    // (but we do need to remember to treat the resulting values 
                                    // as character strings, not numerics!)

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_BIAS;
                                        String newDatum = sdf_dhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_dhms.format(new Date(dateDatum)));
                                        dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }

                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                                    // TODO: 
                                    // (for both datetime and "dateless" time)
                                    // keep the longest of the matching formats - i.e., if there are *some*
                                    // values in the vector that have thousands of a second, that should be 
                                    // part of the saved format!
                                    //  -- L.A. Aug. 12 2014 
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_OFFSET;
                                        String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        dateFormatList[k] = sdf_ymdhms.toPattern();
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_ymdhms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        dateFormatList[k] = sdf_ymdhms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                                    // TODO: 
                                    // double-check that we are handling "dateless" time correctly... -- L.A. Aug. 2014
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
                                        String newDatum = sdf_hms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        if (dateFormatList[k] == null) {
                                            dateFormatList[k] = sdf_hms.toPattern();
                                        }
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_hms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());

                                        String format_hmsS = sdf_hms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        if (dateFormatList[k] == null
                                                || (format_hmsS.length() > dateFormatList[k].length())) {
                                            dateFormatList[k] = format_hmsS;
                                        }
                                    }
                                }

                            } else if (variableFormatType.equals("other")) {
                                dbgLog.finer("other non-date/time case:=" + i);

                                if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                                    // day of week
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                                    // month
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                }
                            }

                        } // end: date-time-datum check

                    } // end: loop-k(2nd: variable-wise-check)

                    // write to tab file
                    if (casewiseRecordForTabFile.size() > 0) {
                        pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
                    }

                    // numeric contents-check
                    for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                        if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                                || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                                || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                            // TODO: 
                            // figure out if any special handling is still needed here in 4.0. 
                            // -- L.A. - Aug. 2014

                        } else {
                            if (variableTypelList.get(l) <= 0) {
                                if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                                    decimalVariableSet.add(l);
                                }
                            }
                        }
                    }

                    // reset the case-wise working objects
                    casewiseRecordForTabFile.clear();

                    if (caseQnty > 0) {
                        if (caseIndex == caseQnty) {
                            hasReachedEOF = true;
                        }
                    }

                    if (hasReachedEOF) {
                        break;
                    }

                } // if(The-end-of-a-case(row)-processing)

            } // loop-i (OBS unit)

            if ((hasReachedEOF) || (stream.available() == 0)) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break OBSERVATION;
            }

            ii++;

        } // while loop

        pwout.close();
    } catch (IOException ex) {
        throw ex;
    }

    dbgLog.fine("<<<<<<");
    dbgLog.fine("formatCategoryTable = " + formatCategoryTable);
    dbgLog.fine(">>>>>>");

    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("decodeRecordTypeDataCompressed(): end");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }//from ww  w .ja  v a 2s .  c om

    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    dbgLog.fine("printFormatNameTable:\n" + printFormatNameTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // contents (variable) checker concering decimals
    variableTypeFinal = new int[varQnty];
    Arrays.fill(variableTypeFinal, 0);

    int numberOfDecimalVariables = 0;

    List<String> casewiseRecordForTabFile = new ArrayList<String>();
    String[] caseWiseDateFormatForUNF = null;
    List<String> casewiseRecordForUNF = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            for (int el = 0; el < casewiseRecordForTabFile.size(); el++) {
                casewiseRecordForUNF.add(casewiseRecordForTabFile.get(el));
            }

            caseWiseDateFormatForUNF = new String[casewiseRecordForTabFile.size()];

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {
                    // String variable case: set to  -1
                    variableTypeFinal[k] = -1;

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    casewiseRecordForUNF.set(k, paddRemoved);
                    casewiseRecordForTabFile.set(k,
                            "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");

                    // end of String var case

                } else {
                    // numeric var case
                    if (casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
                        casewiseRecordForUNF.set(k, null);
                    }

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);
                        caseWiseDateFormatForUNF[k] = sdf_ymd.toPattern();

                        casewiseRecordForTabFile.set(k, newDatum);
                        casewiseRecordForUNF.set(k, newDatum);
                        //formatCategoryTable.put(variableNameList.get(k), "date");
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_hms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = this.sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            if (casewiseRecordForTabFile.size() > 0) {
                for (int ij = 0; ij < varQnty; ij++) {
                    dataTable2[ij][caseIndex - 1] = casewiseRecordForUNF.get(ij);
                    if (variableFormatTypeList[ij].equals("date")
                            || variableFormatTypeList[ij].equals("time")) {
                        this.dateFormats[ij][caseIndex - 1] = caseWiseDateFormatForUNF[ij];
                    }
                }
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                    variableTypeFinal[l] = -1;
                }

                if (variableTypeFinal[l] == 0) {
                    if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                        // l-th variable is not integer
                        variableTypeFinal[l] = 1;
                        decimalVariableSet.add(l);
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();
            casewiseRecordForUNF.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    smd.getFileInformation().put("caseQnty", caseQnty);
    smd.setDecimalVariables(decimalVariableSet);
    smd.setVariableFormatCategory(formatCategoryTable);

    // contents check
    dbgLog.fine("variableType=" + ArrayUtils.toString(variableTypeFinal));
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }//from www  .ja  va 2s  .  com

    int varQnty = dataTable.getVarQuantity().intValue();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    variableFormatTypeList = new String[varQnty];
    dateFormatList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    int numberOfDecimalVariables = 0;

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha

    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    ///dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    ///dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                    casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                    // end of String var case

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);

                        casewiseRecordForTabFile.set(k, newDatum);
                        dateFormatList[k] = sdf_ymd.toPattern();
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");
                        // not treating DTIME as date/time; see comment elsewhere in 
                        // the code; 
                        // (but we do need to remember to treat the resulting values 
                        // as character strings, not numerics!)

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                            // TODO: 
                            // (for both datetime and "dateless" time)
                            // keep the longest of the matching formats - i.e., if there are *some*
                            // values in the vector that have thousands of a second, that should be 
                            // part of the saved format!
                            //  -- L.A. Aug. 12 2014 

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymdhms.toPattern();
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // datetime with milliseconds:
                                dateFormatList[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                if (dateFormatList[k] == null) {
                                    dateFormatList[k] = sdf_hms.toPattern();
                                }
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // time, possibly with milliseconds:
                                String format_hmsS = sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                if (dateFormatList[k] == null
                                        || (format_hmsS.length() > dateFormatList[k].length())) {
                                    dateFormatList[k] = format_hmsS;
                                }
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {

                } else {
                    if (variableTypelList.get(l) <= 0) {
                        if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                            decimalVariableSet.add(l);
                        }
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("reached the end of the file at " + ii + "th iteration");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    // contents check
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber,
        int variablePosition)
        throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException {
    String unfValue = null;//from  w w w . j  a v  a 2  s . c  om

    dbgLog.fine("variableType=" + variableType);
    dbgLog.finer("unfVersionNumber=" + unfVersionNumber);
    dbgLog.fine("variablePosition=" + variablePosition);
    dbgLog.fine("variableName=" + variableNameList.get(variablePosition));
    dbgLog.fine("varData:\n" + Arrays.deepToString(varData));

    switch (variableType) {
    case 0:
        // Integer case
        // note: due to DecimalFormat class is used to
        // remove an unnecessary decimal point and 0-padding
        // numeric (double) data are now String objects

        dbgLog.fine("Integer case");

        // Convert array of Strings to array of Longs
        Long[] ldata = new Long[varData.length];
        for (int i = 0; i < varData.length; i++) {
            if (varData[i] != null) {
                ldata[i] = new Long((String) varData[i]);
            }
        }
        unfValue = UNF5Util.calculateUNF(ldata);
        dbgLog.finer("integer:unfValue=" + unfValue);

        dbgLog.info("sumstat:long case="
                + Arrays.deepToString(ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        dbgLog.info("sumstat:long case="
                + Arrays.deepToString(ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)));

        Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;

    case 1:
        // type "double":
        // The actual Double values have been converted to strings with
        // DecimalFormat; so we'll need to convert them back to a 
        // vector of Doubles for calculating the UNFs and the statistics:

        dbgLog.finer("double case");

        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            if (varData[i] != null) {
                ddata[i] = new Double((String) varData[i]);
            }
        }
        unfValue = UNF5Util.calculateUNF(ddata);
        dbgLog.finer("double:unfValue=" + unfValue);

        // Summary stats. 

        // IMPORTANT: up until version 3.6 we used to automatically 
        // assume that values of type Double were necessarily continuous, 
        // and calculate Distribution Sample statistics for them.
        // However, it is entirely possible to have categorical data
        // with Double values (use case reported by Odum; support ticket 
        // RT #160712, redmine #3175). So, depending on which one it is, 
        // we are now calling either ContDistSample or SummaryStatistics/CategoryStatistics 
        // from StatHelper. 

        boolean isCategoricalVariable = false;

        if (smd.getValueLabelTable()
                .containsKey(smd.getValueLabelMappingTable().get(variableNameList.get(variablePosition)))) {
            isCategoricalVariable = true;
        }

        if (isCategoricalVariable) {
            // We calculate summary statistics on the values, the same
            // way we calculate it for integers: 

            smd.getSummaryStatisticsTable().put(variablePosition,
                    ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ddata)));

            // However, in order to calculate category statistics, we'll 
            // use the values formatted as strings with DecimalFormat. 
            // This is important - because that's how the defined category 
            // values have been formatted. So we don't want a Double 1.0 
            // in the data vector to be counted as different from the 
            // category value defined as "1"!

            String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);

            Map<String, Integer> doubleCatStat = StatHelper.calculateCategoryStatistics(strdata);
            smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), doubleCatStat);

            // TODO: add .info logging.
        } else {
            smd.getSummaryStatisticsTable().put(variablePosition,
                    ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

            dbgLog.info("sumstat:long case=" + Arrays.deepToString(
                    ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata))));
        }

        break;
    case -1:
        // String case

        dbgLog.finer("string case");

        String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);
        dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata));
        unfValue = UNF5Util.calculateUNF(strdata, dateFormats);
        dbgLog.finer("string:unfValue=" + unfValue);

        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> strCatStat = StatHelper.calculateCategoryStatistics(strdata);
        //out.println("catStat="+StrCatStat);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), strCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch

    dbgLog.fine("unfvalue(last)=" + unfValue);

    dbgLog.info("[SAV] UNF = " + unfValue);

    return unfValue;
}

From source file:com.zimbra.cs.account.ldap.LdapProvisioning.java

private void addDynamicGroupMembers(LdapDynamicGroup group, String[] members) throws ServiceException {
    if (group.isMembershipDefinedByCustomURL()) {
        throw ServiceException.INVALID_REQUEST("cannot add members to dynamic group with custom memberURL",
                null);/*from w w w .  j a  v  a2 s.c o  m*/
    }

    String groupId = group.getId();

    List<Account> accts = new ArrayList<Account>();
    List<String> externalAddrs = new ArrayList<String>();

    // check for errors, and put valid accts to the queue
    for (String member : members) {
        String memberName = member.toLowerCase();
        memberName = IDNUtil.toAsciiEmail(memberName);

        Account acct = get(AccountBy.name, memberName);
        if (acct == null) {
            // addr is not an account (could still be a group or group unit address
            // on the system), will check by addressExists.
            externalAddrs.add(memberName);
        } else {
            // is an account
            Set<String> memberOf = acct.getMultiAttrSet(Provisioning.A_zimbraMemberOf);
            if (!memberOf.contains(groupId)) {
                accts.add(acct);
            }
            // else the addr is already in the group, just skip it, do not throw
        }
    }

    ZLdapContext zlc = null;
    try {
        zlc = LdapClient.getContext(LdapServerType.MASTER, LdapUsage.ADD_GROUP_MEMBER);

        // check non of the addrs in externalAddrs can be an email address
        // on the system
        if (!externalAddrs.isEmpty()) {
            if (addressExists(zlc, externalAddrs.toArray(new String[externalAddrs.size()]))) {
                throw ServiceException.INVALID_REQUEST(
                        "address cannot be a group: " + Arrays.deepToString(externalAddrs.toArray()), null);
            }
        }

        /*
         * add internal members
         */
        for (Account acct : accts) {
            Map<String, Object> attrs = new HashMap<String, Object>();
            attrs.put("+" + Provisioning.A_zimbraMemberOf, groupId);
            modifyLdapAttrs(acct, zlc, attrs);
            clearUpwardMembershipCache(acct);
        }

        /*
         * add external members on the static unit
         */
        LdapDynamicGroup.StaticUnit staticUnit = group.getStaticUnit();
        Set<String> existingAddrs = staticUnit.getMembersSet();
        List<String> addrsToAdd = Lists.newArrayList();
        for (String addr : externalAddrs) {
            if (!existingAddrs.contains(addr)) {
                addrsToAdd.add(addr);
            }
        }

        if (!addrsToAdd.isEmpty()) {
            Map<String, String[]> attrs = new HashMap<String, String[]>();
            attrs.put("+" + LdapDynamicGroup.StaticUnit.MEMBER_ATTR,
                    addrsToAdd.toArray(new String[addrsToAdd.size()]));
            modifyLdapAttrs(staticUnit, zlc, attrs);
        }

    } finally {
        LdapClient.closeContext(zlc);
    }
    PermissionCache.invalidateCache();
    cleanGroupMembersCache(group);
}