Example usage for java.io BufferedInputStream available

Introduction

In this page you can find the example usage for java.io BufferedInputStream available.

Prototype

public synchronized int available() throws IOException

Source Link

Document

Returns an estimate of the number of bytes that can be read (or skipped over) from this input stream without blocking by the next invocation of a method for this input stream.

Usage

From source file:com.wabacus.WabacusFacade.java

public static void downloadFile(HttpServletRequest request, HttpServletResponse response) {
    response.setContentType("application/x-msdownload;");
    BufferedInputStream bis = null;
    BufferedOutputStream bos = null;
    String realfilepath = null;/*from   www  . j a  v a 2 s.  com*/
    try {
        bos = new BufferedOutputStream(response.getOutputStream());
        String serverfilename = request.getParameter("serverfilename");
        String serverfilepath = request.getParameter("serverfilepath");
        String newfilename = request.getParameter("newfilename");
        if (serverfilename == null || serverfilename.trim().equals("")) {
            bos.write("????".getBytes());
            return;
        }
        if (serverfilename.indexOf("/") >= 0 || serverfilename.indexOf("\\") >= 0) {
            bos.write("?????".getBytes());
            return;
        }
        if (serverfilepath == null || serverfilepath.trim().equals("")) {
            bos.write("??".getBytes());
            return;
        }
        if (newfilename == null || newfilename.trim().equals(""))
            newfilename = serverfilename;
        newfilename = WabacusAssistant.getInstance().encodeAttachFilename(request, newfilename);
        response.setHeader("Content-disposition", "attachment;filename=" + newfilename);
        //response.setHeader("Content-disposition","inline;filename="+newfilename);
        String realserverfilepath = null;
        if (Tools.isDefineKey("$", serverfilepath)) {
            realserverfilepath = Config.getInstance().getResourceString(null, null, serverfilepath, true);
        } else {
            realserverfilepath = WabacusUtils.decodeFilePath(serverfilepath);
        }
        if (realserverfilepath == null || realserverfilepath.trim().equals("")) {
            bos.write(("?" + serverfilepath + "??").getBytes());
        }
        realserverfilepath = WabacusAssistant.getInstance().parseConfigPathToRealPath(realserverfilepath,
                Config.webroot_abspath);
        if (Tools.isDefineKey("classpath", realserverfilepath)) {
            realserverfilepath = Tools.getRealKeyByDefine("classpath", realserverfilepath);
            realserverfilepath = Tools.replaceAll(realserverfilepath + "/" + serverfilename, "//", "/").trim();
            while (realserverfilepath.startsWith("/"))
                realserverfilepath = realserverfilepath.substring(1);//???ClassLoader?Class?/
            bis = new BufferedInputStream(
                    ConfigLoadManager.currentDynClassLoader.getResourceAsStream(realserverfilepath));
            response.setContentLength(bis.available());
        } else {
            File downloadFileObj = new File(FilePathAssistant.getInstance()
                    .standardFilePath(realserverfilepath + File.separator + serverfilename));
            if (!downloadFileObj.exists() || downloadFileObj.isDirectory()) {
                bos.write(("?" + serverfilename).getBytes());
                return;
            }
            //response.setHeader("Content-Length", String.valueOf(downloadFileObj.length()));
            bis = new BufferedInputStream(new FileInputStream(downloadFileObj));
        }
        byte[] buff = new byte[1024];
        int bytesRead;
        while ((bytesRead = bis.read(buff, 0, buff.length)) != -1) {
            bos.write(buff, 0, bytesRead);
        }
    } catch (IOException e) {
        throw new WabacusRuntimeException("" + realfilepath + "", e);
    } finally {
        try {
            if (bis != null)
                bis.close();
        } catch (IOException e) {
            log.warn("" + realfilepath + "?", e);
        }
        try {
            if (bos != null)
                bos.close();
        } catch (IOException e) {
            log.warn("" + realfilepath + "?", e);
        }
    }
}

From source file:org.apache.carbondata.sdk.file.ImageTest.java

public void binaryToCarbonWithHWD(String sourceImageFolder, String outputPath, String preDestPath,
        String sufAnnotation, final String sufImage, int numToWrite) throws Exception {
    int num = 1;/*  w w w. ja  v  a2  s.  c  om*/
    Field[] fields = new Field[7];
    fields[0] = new Field("height", DataTypes.INT);
    fields[1] = new Field("width", DataTypes.INT);
    fields[2] = new Field("depth", DataTypes.INT);
    fields[3] = new Field("binaryName", DataTypes.STRING);
    fields[4] = new Field("binary", DataTypes.BINARY);
    fields[5] = new Field("labelName", DataTypes.STRING);
    fields[6] = new Field("labelContent", DataTypes.STRING);

    byte[] originBinary = null;

    // read and write image data
    for (int j = 0; j < num; j++) {

        Object[] files = listFiles(sourceImageFolder, sufImage).toArray();

        int index = 0;

        if (null != files) {
            CarbonWriter writer = CarbonWriter.builder().outputPath(outputPath).withCsvInput(new Schema(fields))
                    .withBlockSize(256).writtenBy("SDKS3Example").withPageSizeInMb(1).build();

            for (int i = 0; i < files.length; i++) {
                if (0 == index % numToWrite) {
                    writer.close();
                    writer = CarbonWriter.builder().outputPath(outputPath).withCsvInput(new Schema(fields))
                            .withBlockSize(256).writtenBy("SDKS3Example").withPageSizeInMb(1).build();
                }
                index++;

                // read image and encode to Hex
                File file = new File((String) files[i]);
                System.out.println(file.getCanonicalPath());
                BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
                int depth = 0;
                boolean isGray;
                boolean hasAlpha;
                BufferedImage bufferedImage = null;
                try {
                    bufferedImage = ImageIO.read(file);
                    isGray = bufferedImage.getColorModel().getColorSpace().getType() == ColorSpace.TYPE_GRAY;
                    hasAlpha = bufferedImage.getColorModel().hasAlpha();

                    if (isGray) {
                        depth = 1;
                    } else if (hasAlpha) {
                        depth = 4;
                    } else {
                        depth = 3;
                    }

                } catch (Exception e) {
                    e.printStackTrace();
                    System.out.println(i);
                    ImageInputStream stream = new FileImageInputStream(new File(file.getCanonicalPath()));
                    Iterator<ImageReader> iter = ImageIO.getImageReaders(stream);

                    Exception lastException = null;
                    while (iter.hasNext()) {
                        ImageReader reader = null;
                        try {
                            reader = (ImageReader) iter.next();
                            ImageReadParam param = reader.getDefaultReadParam();
                            reader.setInput(stream, true, true);
                            Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0);

                            while (imageTypes.hasNext()) {
                                ImageTypeSpecifier imageTypeSpecifier = imageTypes.next();
                                System.out
                                        .println(imageTypeSpecifier.getColorModel().getColorSpace().getType());
                                int bufferedImageType = imageTypeSpecifier.getBufferedImageType();
                                if (bufferedImageType == BufferedImage.TYPE_BYTE_GRAY) {
                                    param.setDestinationType(imageTypeSpecifier);
                                    break;
                                }
                            }
                            bufferedImage = reader.read(0, param);
                            isGray = bufferedImage.getColorModel().getColorSpace()
                                    .getType() == ColorSpace.TYPE_GRAY;
                            hasAlpha = bufferedImage.getColorModel().hasAlpha();

                            if (isGray) {
                                depth = 1;
                            } else if (hasAlpha) {
                                depth = 4;
                            } else {
                                depth = 3;
                            }
                            if (null != bufferedImage)
                                break;
                        } catch (Exception e2) {
                            lastException = e2;
                        } finally {
                            if (null != reader)
                                reader.dispose();
                        }
                    }
                    // If you don't have an image at the end of all readers
                    if (null == bufferedImage) {
                        if (null != lastException) {
                            throw lastException;
                        }
                    }
                } finally {
                    originBinary = new byte[bis.available()];
                    while ((bis.read(originBinary)) != -1) {
                    }

                    String txtFileName = file.getCanonicalPath().split(sufImage)[0] + sufAnnotation;
                    BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
                    String txtValue = null;
                    byte[] txtBinary = null;
                    txtBinary = new byte[txtBis.available()];
                    while ((txtBis.read(txtBinary)) != -1) {
                        txtValue = new String(txtBinary, "UTF-8");
                    }
                    // write data
                    writer.write(new Object[] { bufferedImage.getHeight(), bufferedImage.getWidth(), depth,
                            file.getCanonicalPath(), originBinary, txtFileName, txtValue.replace("\n", "") });
                    bis.close();
                }
            }
            writer.close();
        }
    }

    CarbonReader reader = CarbonReader.builder(outputPath).build();

    System.out.println("\nData:");
    int i = 0;
    while (i < 20 && reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();

        byte[] outputBinary = (byte[]) row[1];
        System.out.println(row[2] + " " + row[3] + " " + row[4] + " " + row[5] + " image size:"
                + outputBinary.length + " " + row[0]);

        // save image, user can compare the save image and original image
        String destString = preDestPath + i + sufImage;
        BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
        bos.write(outputBinary);
        bos.close();
        i++;
    }
    System.out.println("\nFinished");
    reader.close();
}

From source file:org.jab.docsearch.spider.LinkFinder.java

/**
 * Get all links from page//w w w  .ja  v  a 2  s. c  o m
 */
public void getAllLinks() {
    // writes links from a page out to a file
    String urlStr = pageName;
    String shortUrl = "";
    numUnChanged = 0;
    numSkips = 0;
    int numSuccesses = 0;
    int numFailed = 0;
    int numNoRobots = 0;
    addLink(urlStr);
    domainUrl = Utils.getDomainURL(urlStr);
    if (logger.isDebugEnabled()) {
        logger.debug("getAllLinks() domain url='" + domainUrl + "'");
    }
    SpiderUrl curl = new SpiderUrl(urlStr);
    baseUrlFolder = Utils.getBaseURLFolder(urlStr);
    int curLinkNo = 0;
    boolean completedSpider = false;
    boolean isDead = false;
    int curPread = 0;
    if (ds != null) {
        ds.setIsWorking(true);
        ds.setProgressMax(maxLinksToFind);
        ds.setCurProgressMSG("Spidering Files...");
    }
    int numSpidered = 0;
    int curSuccessNo = 0;

    // start spider
    while (curLinkNo != -1) {
        BufferedInputStream urlStream = null;
        FileOutputStream fileOutStream = null;

        try {
            completedSpider = false;
            isDead = false;
            if (ds != null) {
                ds.setCurProgress(curPread);
                if (!ds.getIsWorking()) {
                    break;
                }
            }
            curLinkNo = getNextUrlNo();
            if (curLinkNo == -1) {
                logger.debug("getAllLinks() end of links reached.");
                break;
            } else {
                urlStr = getLinkNameByNo(curLinkNo);
                logger.info("getAllLinks() analyzing page='" + urlStr + "'");
                curl = getSpiderUrl(curLinkNo);
            }

            shortUrl = Utils.concatEnd(urlStr, 33);
            setStatus(I18n.getString("connecting_to") + " " + shortUrl);

            // open url
            URL url = new URL(urlStr);
            URLConnection conn = url.openConnection();
            conn.setDoInput(true);
            conn.setUseCaches(false);
            conn.setRequestProperty("User-Agent", "DocSearcher " + I18n.getString("ds.version"));
            conn.connect();
            urlStream = new BufferedInputStream(conn.getInputStream());

            // filesize
            int fileSize = conn.getContentLength();
            if (fileSize > maxFileSizeToGet) {
                String ex = I18n.getString("skipping_file_too_big") + " (" + fileSize + " > " + maxFileSizeToGet
                        + ") " + shortUrl;
                setStatus(ex);
                throw new Exception(ex);
            }

            setStatus(I18n.getString("downloading_uc") + "... " + shortUrl + " " + fileSize + " "
                    + I18n.getString("bytes"));
            curl.setSize(fileSize);

            // last modified
            long curModified = conn.getLastModified(); // was .getDate();
            curl.setLastModified(curModified);

            // content type
            String curContentType = netUtils.getContentType(conn);
            curl.setContentType(curContentType);

            // build the value for downloadFile
            String dnldTmpName = getDownloadFileName(curl.getContentType(), urlStr.toLowerCase());
            String downloadFile = FileUtils.addFolder(downloadFileDir, dnldTmpName);

            // TODO it is better to use content type!
            boolean curIsWebPage = isHtml(urlStr.toLowerCase())
                    || (curContentType.toLowerCase().indexOf("html") != -1);

            logger.debug("getAllLinks() saving to " + downloadFile);
            fileOutStream = new FileOutputStream(downloadFile);
            int curSize = 0;
            int curI;
            int lastPercent = 0;
            StringBuilder tag = new StringBuilder();
            String link = null;
            boolean inTag = false;
            boolean getFileSizeFromStream = false;
            if (fileSize == -1) {
                getFileSizeFromStream = true;
            }

            while ((curI = urlStream.read()) != -1) {
                fileOutStream.write(curI);

                curSize++;
                if (ds != null) {
                    if (!ds.getIsWorking()) {
                        break;
                    }
                }

                // fix problem if filesize not in content length
                if (getFileSizeFromStream) {
                    fileSize = curSize + urlStream.available();
                }

                // notify of download progress
                if (curSize > 0 && (curSize % 10) == 0) {
                    int curPercent = (curSize * 100) / fileSize;
                    if (curPercent != lastPercent) {
                        lastPercent = curPercent;
                        setStatus(I18n.getString("downloading_uc") + "... : (" + shortUrl + ") --> "
                                + curPercent + " %" + " ( " + (numSuccesses + numFailed + numNoRobots) + "/"
                                + getNumLinksFound() + ")");
                    }
                } // end for percent updates
                else if (curSize % 40 == 0) {
                    setStatus(I18n.getString("downloading_uc") + "... : (" + shortUrl + ") --> " + curSize + " "
                            + I18n.getString("bytes"));
                }

                // handle links
                if (curIsWebPage) {
                    char c = (char) curI;
                    // LOOK AT THE TAGS

                    // start tag
                    if (c == '<') {
                        inTag = true;
                        tag = new StringBuilder();
                    }
                    // end tag
                    else if (c == '>') {
                        inTag = false;
                        tag.append(c);
                        String realTag = tag.toString();
                        String lowerTag = realTag.toLowerCase();

                        // TODO fix problem with spaces before =

                        // link
                        if (lowerTag.startsWith("<a ")) {
                            link = Utils.getTagString("href=", realTag);
                            link = Utils.getNormalUrl(link);
                            doPossibleAdd(urlStr, link);
                        }
                        // area
                        else if (lowerTag.startsWith("<area")) {
                            link = Utils.getTagString("href=", realTag);
                            link = Utils.getNormalUrl(link);
                            doPossibleAdd(urlStr, link);
                        }
                        // TODO is in param realy a link?
                        else if (lowerTag.startsWith("<param")) {
                            String appletParam = Utils.getTagString("name=", realTag);
                            if (appletParam.toLowerCase().equals("url")) {
                                link = Utils.getTagString("value=", realTag);
                                link = Utils.getNormalUrl(link);
                                doPossibleAdd(urlStr, link);
                            }
                        }
                    }

                    // in tag
                    if (inTag) {
                        tag.append(c);
                    }
                }

                // filesize ok
                if (getFileSizeFromStream && fileSize > maxFileSizeToGet) {
                    break;
                }
            } // end while downloading
            curPread++;
            fileOutStream.close();
            urlStream.close();
            curl.setMd5(FileUtils.getMD5Sum(downloadFile));

            // now add out document
            if (ds != null) {
                curSuccessNo = ds.idx.addDocToIndex(downloadFile, iw, dsi, false, curl);
                switch (curSuccessNo) {
                case 0: // good
                    numSuccesses++;
                    break;
                case 1: // bad
                    numFailed++;
                    break;
                case 2: // meta robots - no index
                    numNoRobots++;
                    break;
                }
            }

            // delete temp file
            if (!FileUtils.deleteFile(downloadFile)) {
                logger.warn("getAllLinks() can't delete file '" + downloadFile + "'");
            }

            numSpidered++;
            completedSpider = true;

            // max links found
            if (numSpidered > maxLinksToFind) {
                break;
            }
        } catch (Exception e) {
            logger.fatal("getAllLinks() failed", e);
            setStatus(I18n.getString("error") + " : " + e.toString());
            isDead = true;
        } finally {
            // close resources
            IOUtils.closeQuietly(urlStream);
            IOUtils.closeQuietly(fileOutStream);

            curl.setSpidered(completedSpider);
            curl.setIsDeadLink(isDead);
            setStatus(I18n.getString("download_complete") + " " + shortUrl);
        }
    } // end for iterating over links

    if (ds != null) {
        ds.resetProgress();
    }
    saveAllLinks();

    logger.info("getAllLinks() " + numSpidered + " total web pages spidered for links.");

    showMessage(I18n.getString("spidering_complete") + " (" + Utils.concatStrToEnd(pageName, 28) + ") ",
            numSpidered + " " + I18n.getString("documents_indexed") + " " + getNumLinksFound() + " "
                    + I18n.getString("links_found") + "\n\n" + numSuccesses + " "
                    + I18n.getString("documents_spidered_successful") + "\n\n" + numFailed + " "
                    + I18n.getString("documents_spidered_failed") + "\n\n" + numNoRobots + " "
                    + I18n.getString("documents_not_spidered"));
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java

void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** parseValueLabelsRelease105(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*from   w w w .j  av a 2s . c  o  m*/

    int nvar = (Integer) smd.getFileInformation().get("varQnty");
    int length_label_name = constantTable.get("NAME") + 1;
    // note: caution +1 as the null character, not 9 byte

    int length_value_label_header = value_label_table_length + length_label_name;

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("value_label_table_length=" + value_label_table_length);
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("length_value_label_header=" + length_value_label_header);

    int length_lable_name_field = 8;

    /*
    Seg  field         byte    type
    1-1. no of pairs      2    int  (= m)
    1-2. vlt_name        10    includes char+(\0) == name used in Sec2.part 5
     -----------------------------------
                         11
    2-1. values         2*n    int[]
    2-2. labels         8*n    char
    */

    for (int i = 0; i < nvar; i++) {
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("\n\n" + i + "th value-label table header");

        byte[] valueLabelHeader = new byte[length_value_label_header];

        // Part 1: reading the header of a value-label table if exists
        int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);

        if (nbytes == 0) {
            throw new IOException("reading value label header: no datum");
        }

        // 1.1 number of value-label pairs in this table (= m)
        ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
            //if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value lable table lenth: byte reversed");
        }
        int no_value_label_pairs = bb_value_label_pairs.getShort();

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);

        // 1.2 labelName
        String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length,
                (value_label_table_length + length_label_name)), "ISO-8859-1");

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("rawLabelName(length)=" + rawLabelName.length());
        String labelName = rawLabelName.substring(0, rawLabelName.indexOf(0));

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("label name = " + labelName + "\n");

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "-th value-label table");
        // Part 2: reading the value-label table
        // the length of the value-label table is: 2*m + 8*m = 10*m
        int length_value_label_table = (value_label_table_length + length_lable_name_field)
                * no_value_label_pairs;

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("length_value_label_table=" + length_value_label_table);

        byte[] valueLabelTable_i = new byte[length_value_label_table];
        int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
        if (noBytes == 0) {
            throw new IOException("reading value label table: no datum");
        }

        // 2-1. 2-byte-integer array (2*m): value array (sorted)

        short[] valueList = new short[no_value_label_pairs];
        int offset_value = 0;

        for (int k = 0; k < no_value_label_pairs; k++) {

            ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
            }
            valueList[k] = bb_value_list.getShort();

            offset_value += value_label_table_length;
        }

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("value_list=" + Arrays.toString(valueList) + "\n");

        // 2-2. 8-byte chars that store label data (m units of labels)

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("current offset_value=" + offset_value);

        int offset_start = offset_value;
        int offset_end = offset_value + length_lable_name_field;
        String[] labelList = new String[no_value_label_pairs];

        for (int l = 0; l < no_value_label_pairs; l++) {

            String string_l = new String(Arrays.copyOfRange(valueLabelTable_i, offset_start, offset_end),
                    "ISO-8859-1");

            int null_position = string_l.indexOf(0);
            if (null_position != -1) {
                labelList[l] = string_l.substring(0, null_position);
            } else {
                labelList[l] = string_l;
            }

            offset_start = offset_end;
            offset_end += length_lable_name_field;
        }

        Map<String, String> tmpValueLabelTable = new LinkedHashMap<String, String>();

        for (int j = 0; j < no_value_label_pairs; j++) {
            if (dbgLog.isLoggable(Level.FINE))
                dbgLog.fine(j + "-th pair:" + valueList[j] + "[" + labelList[j] + "]");
            tmpValueLabelTable.put(Integer.toString(valueList[j]), labelList[j]);
        }
        valueLabelTable.put(labelName, tmpValueLabelTable);

        if (stream.available() == 0) {
            // reached the end of this file
            // do exit-processing
            if (dbgLog.isLoggable(Level.FINE))
                dbgLog.fine("***** reached the end of the file at " + i + "th value-label Table *****");
            break;
        }

    } // for-loop

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("valueLabelTable:\n" + valueLabelTable);

    smd.setValueLabelTable(valueLabelTable);

    dbgLog.fine("***** parseValueLabelsRelease105(): end *****");

}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException {

    dbgLog.fine("parseValueLabelsRelease105(): start");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*  w  w w . j  a va2s  . c om*/

    int nvar = dataTable.getVarQuantity().intValue();
    int length_label_name = constantTable.get("NAME") + 1;
    // note: caution +1 as the null character, not 9 byte

    int length_value_label_header = value_label_table_length + length_label_name;

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("value_label_table_length=" + value_label_table_length);
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("length_value_label_header=" + length_value_label_header);
    }

    int length_lable_name_field = 8;

    /*
     Seg  field         byte    type
     1-1. no of pairs      2    int  (= m)
     1-2. vlt_name        10    includes char+(\0) == name used in Sec2.part 5
     -----------------------------------
     11
     2-1. values         2*n    int[]
     2-2. labels         8*n    char
     */

    // This map will hold a temporary lookup table for all the categorical
    // value-label groups we are going to find here:
    // These groups have unique names, and a group *may be shared* between
    // multiple variables. In the method decodeDescriptorValueLabel above
    // we have populated a lookup table where variables are linked to the 
    // corresponding value-label groups by name. Thus we must fully populate 
    // the full map of all the variable groups, then go through the list 
    // of variables and create the dataverse variable categories from 
    // them. -- L.A. 4.0       
    Map<String, Map<String, String>> tempValueLabelTable = new LinkedHashMap<>();

    for (int i = 0; i < nvar; i++) {
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("\n\n" + i + "th value-label table header");
        }

        byte[] valueLabelHeader = new byte[length_value_label_header];

        // Part 1: reading the header of a value-label table if exists
        int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);

        if (nbytes == 0) {
            throw new IOException("reading value label header: no datum");
        }

        // 1.1 number of value-label pairs in this table (= m)
        ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
            //if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value lable table lenth: byte reversed");
        }
        int no_value_label_pairs = bb_value_label_pairs.getShort();

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);
        }

        // 1.2 labelName
        String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length,
                (value_label_table_length + length_label_name)), "ISO-8859-1");

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("rawLabelName(length)=" + rawLabelName.length());
        }
        String labelName = rawLabelName.substring(0, rawLabelName.indexOf(0));

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("label name = " + labelName + "\n");
        }

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine(i + "-th value-label table");
        }
        // Part 2: reading the value-label table
        // the length of the value-label table is: 2*m + 8*m = 10*m
        int length_value_label_table = (value_label_table_length + length_lable_name_field)
                * no_value_label_pairs;

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("length_value_label_table=" + length_value_label_table);
        }

        byte[] valueLabelTable_i = new byte[length_value_label_table];
        int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
        if (noBytes == 0) {
            throw new IOException("reading value label table: no datum");
        }

        // 2-1. 2-byte-integer array (2*m): value array (sorted)
        short[] valueList = new short[no_value_label_pairs];
        int offset_value = 0;

        for (int k = 0; k < no_value_label_pairs; k++) {

            ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
            }
            valueList[k] = bb_value_list.getShort();

            offset_value += value_label_table_length;
        }

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("value_list=" + Arrays.toString(valueList) + "\n");
        }

        // 2-2. 8-byte chars that store label data (m units of labels)
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("current offset_value=" + offset_value);
        }

        int offset_start = offset_value;
        int offset_end = offset_value + length_lable_name_field;
        String[] labelList = new String[no_value_label_pairs];

        for (int l = 0; l < no_value_label_pairs; l++) {

            String string_l = new String(Arrays.copyOfRange(valueLabelTable_i, offset_start, offset_end),
                    "ISO-8859-1");

            int null_position = string_l.indexOf(0);
            if (null_position != -1) {
                labelList[l] = string_l.substring(0, null_position);
            } else {
                labelList[l] = string_l;
            }

            offset_start = offset_end;
            offset_end += length_lable_name_field;
        }

        // Finally, we've reached the actual value-label pairs. We'll go 
        // through them and put them on the temporary lookup map: 

        tempValueLabelTable.put(labelName, new LinkedHashMap<>());

        for (int j = 0; j < no_value_label_pairs; j++) {
            if (dbgLog.isLoggable(Level.FINE)) {
                dbgLog.fine(j + "-th pair:" + valueList[j] + "[" + labelList[j] + "]");
            }

            // TODO: do we need any null/empty string checks here? -- L.A. 4.0
            tempValueLabelTable.get(labelName).put(Integer.toString(valueList[j]), labelList[j]);
        }

        if (stream.available() == 0) {
            // reached the end of the file
            if (dbgLog.isLoggable(Level.FINE)) {
                dbgLog.fine("reached the end of file at " + i + "th value-label Table.");
            }
            break;
        }

    } // for nvar loop

    // And now we can go through the list of variables, see if any have 
    // value-label groups linked, then build dataverse VariableCategory 
    // objects for them, using the values stored in the temporary map 
    // we've just built:

    for (int i = 0; i < nvar; i++) {
        if (valueLabelsLookupTable[i] != null) {
            if (tempValueLabelTable.get(valueLabelsLookupTable[i]) != null) {
                // What if it is null? -- is it a legit condition, that 
                // a variable was advertised as having categorical values,
                // but no such cat value group exists under this name?
                // -- L.A.
                for (String value : tempValueLabelTable.get(valueLabelsLookupTable[i]).keySet()) {
                    VariableCategory cat = new VariableCategory();

                    cat.setValue(value);
                    cat.setLabel(tempValueLabelTable.get(valueLabelsLookupTable[i]).get(value));

                    /* cross-link the variable and category to each other: */
                    cat.setDataVariable(dataTable.getDataVariables().get(i));
                    dataTable.getDataVariables().get(i).getCategories().add(cat);
                }
            }
        }
    }

    dbgLog.fine("parseValueLabelsRelease105(): end");

}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java

private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** parseValueLabelsRelease108(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*from   ww  w . j  a v a2s.  co  m*/

    int nvar = (Integer) smd.getFileInformation().get("varQnty");
    int length_label_name = constantTable.get("NAME");
    int length_value_label_header = value_label_table_length + length_label_name
            + VALUE_LABEL_HEADER_PADDING_LENGTH;

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("value_label_table_length=" + value_label_table_length);
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("length_value_label_header=" + length_value_label_header);
    /*
    Seg  field         byte    type
    1-1. len_vlt(Seg.2)   4    int
    1-2. vlt_name      9/33    char+(\0) == name used in Sec2.part 5
    1-3. padding          3    byte
     -----------------------------------
                      16/40
    2-1. n(# of vls)      4    int
    2-2. m(len_labels)    4    int
    2-3. label_offsets    4*n  int[]
    2-4. values           4*n  int[]
    2-5. labels           m    char
    */

    for (int i = 0; i < nvar; i++) {
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("\n\n" + i + "th value-label table header");

        byte[] valueLabelHeader = new byte[length_value_label_header];

        // Part 1: reading the header of a value-label table if exists
        int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);

        if (nbytes == 0) {
            throw new IOException("reading value label header: no datum");
        }

        // 1.1 length_value_label_table
        ByteBuffer bb_value_label_header = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_header.order(ByteOrder.LITTLE_ENDIAN);
        }
        int length_value_label_table = bb_value_label_header.getInt();

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("length of this value-label table=" + length_value_label_table);

        // 1.2 labelName
        String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length,
                (value_label_table_length + length_label_name)), "ISO-8859-1");
        String labelName = getNullStrippedString(rawLabelName);

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("label name = " + labelName + "\n");

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "-th value-label table");
        // Part 2: reading the value-label table
        byte[] valueLabelTable_i = new byte[length_value_label_table];
        int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
        if (noBytes == 0) {
            throw new IOException("reading value label table: no datum");
        }

        // 2-1. 4-byte-integer: number of units in this table (n)
        int valueLabelTable_offset = 0;
        ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset,
                value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
        }

        int no_value_label_pairs = bb_value_label_pairs.getInt();

        valueLabelTable_offset += value_label_table_length;

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);

        // 2-2. 4-byte-integer: length of the label section (m bytes)

        ByteBuffer bb_length_label_segment = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset,
                value_label_table_length);
        if (isLittleEndian) {
            bb_length_label_segment.order(ByteOrder.LITTLE_ENDIAN);
        }

        int length_label_segment = bb_length_label_segment.getInt();
        valueLabelTable_offset += value_label_table_length;

        // 2-3. 4-byte-integer array (4xm): offset values for the label sec.

        // these "label offsets" actually appear to represent the byte
        // offsets of the label strings, as stored in the next section.
        // as of now, these are not used for anything, and the code
        // below assumes that the labels are already in the same
        // order as the numeric values! -- L.A.

        int[] label_offsets = new int[no_value_label_pairs];
        int byte_offset = valueLabelTable_offset;

        for (int j = 0; j < no_value_label_pairs; j++) {

            // note: 4-byte singed, not java's long
            ByteBuffer bb_label_offset = ByteBuffer.wrap(valueLabelTable_i, byte_offset,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_label_offset.order(ByteOrder.LITTLE_ENDIAN);
                dbgLog.fine("label offset: byte reversed");
            }
            label_offsets[j] = bb_label_offset.getInt();
            dbgLog.fine("label offset [" + j + "]: " + label_offsets[j]);

            byte_offset += value_label_table_length;

        }

        // 2-4. 4-byte-integer array (4xm): value array (sorted)

        dbgLog.fine("value array");

        int[] valueList = new int[no_value_label_pairs];
        int offset_value = byte_offset;

        for (int k = 0; k < no_value_label_pairs; k++) {

            ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
            }
            valueList[k] = bb_value_list.getInt();

            offset_value += value_label_table_length;

        }

        // 2-5. m-byte chars that store label data (m units of labels)

        String label_segment = new String(
                Arrays.copyOfRange(valueLabelTable_i, offset_value, (length_label_segment + offset_value)),
                "ISO-8859-1");

        // L.A. -- 2011.2.25:
        // This assumes that the labels are already stored in the right
        // order: (see my comment for the section 2.3 above)

        //String[] labelList = label_segment.split("\0");

        // Instead, we should be using the offset values obtained in
        // the section 2.3 above, and select the corresponding
        // substrings:

        String[] labelList = new String[no_value_label_pairs];

        for (int l = 0; l < no_value_label_pairs; l++) {
            String lblString = null;
            int lblOffset = label_offsets[l];

            lblString = label_segment.substring(lblOffset);

            int nullIndx = lblString.indexOf('\000');

            if (nullIndx > -1) {
                lblString = lblString.substring(0, nullIndx);
            }

            labelList[l] = lblString;
        }

        // this should work! -- L.A.
        // (TODO: check the v105 value label parsing method, to see if
        // something similar applies there)

        Map<String, String> tmpValueLabelTable = new LinkedHashMap<String, String>();

        for (int l = 0; l < no_value_label_pairs; l++) {
            if (dbgLog.isLoggable(Level.FINE))
                dbgLog.fine(l + "-th pair:" + valueList[l] + "[" + labelList[l] + "]");

            tmpValueLabelTable.put(Integer.toString(valueList[l]), labelList[l]);
        }

        valueLabelTable.put(labelName, tmpValueLabelTable);

        if (stream.available() == 0) {
            // reached the end of this file
            // do exit-processing
            dbgLog.fine("***** reached the end of the file at " + i + "th value-label Table *****");
            break;
        }

    } // for loop
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("valueLabelTable:\n" + valueLabelTable);

    smd.setValueLabelTable(valueLabelTable);

    dbgLog.fine("***** parseValueLabelsRelease108(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOException {

    dbgLog.fine("parseValueLabelsRelease108(): start");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*  w  ww .  j av  a 2s.c om*/

    int nvar = dataTable.getVarQuantity().intValue();
    int length_label_name = constantTable.get("NAME");
    int length_value_label_header = value_label_table_length + length_label_name
            + VALUE_LABEL_HEADER_PADDING_LENGTH;

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("value_label_table_length=" + value_label_table_length);
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("length_value_label_header=" + length_value_label_header);
    }
    /*
     Seg  field         byte    type
     1-1. len_vlt(Seg.2)   4    int
     1-2. vlt_name      9/33    char+(\0) == name used in Sec2.part 5
     1-3. padding          3    byte
     -----------------------------------
     16/40
     2-1. n(# of vls)      4    int
     2-2. m(len_labels)    4    int
     2-3. label_offsets    4*n  int[]
     2-4. values           4*n  int[]
     2-5. labels           m    char
     */

    // This map will hold a temporary lookup table for all the categorical
    // value-label groups:
    // These groups have unique names, and a group *may be shared* between
    // multiple variables. In the method decodeDescriptorValueLabel above
    // we have populated a lookup table where variables are linked to the 
    // corresponding value-label groups by name. Thus we must fully populate 
    // the full map of all the variable group, then go through the list 
    // of variables and create the dataverse variable categories from 
    // them. -- L.A. 4.0

    Map<String, Map<String, String>> tempValueLabelTable = new LinkedHashMap<>();

    for (int i = 0; i < nvar; i++) {
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("\n\n" + i + "th value-label table header");
        }

        byte[] valueLabelHeader = new byte[length_value_label_header];

        // Part 1: reading the header of a value-label table if exists
        int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);

        if (nbytes == 0) {
            throw new IOException("reading value label header: no datum");
        }

        // 1.1 length_value_label_table
        ByteBuffer bb_value_label_header = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_header.order(ByteOrder.LITTLE_ENDIAN);
        }
        int length_value_label_table = bb_value_label_header.getInt();

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("length of this value-label table=" + length_value_label_table);
        }

        // 1.2 labelName
        String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length,
                (value_label_table_length + length_label_name)), "ISO-8859-1");
        String labelName = getNullStrippedString(rawLabelName);

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("label name = " + labelName + "\n");
        }

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine(i + "-th value-label table");
        }
        // Part 2: reading the value-label table
        byte[] valueLabelTable_i = new byte[length_value_label_table];
        int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
        if (noBytes == 0) {
            throw new IOException("reading value label table: no datum");
        }

        // 2-1. 4-byte-integer: number of units in this table (n)
        int valueLabelTable_offset = 0;
        ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset,
                value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
        }

        int no_value_label_pairs = bb_value_label_pairs.getInt();

        valueLabelTable_offset += value_label_table_length;

        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);
        }

        // 2-2. 4-byte-integer: length of the label section (m bytes)
        ByteBuffer bb_length_label_segment = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset,
                value_label_table_length);
        if (isLittleEndian) {
            bb_length_label_segment.order(ByteOrder.LITTLE_ENDIAN);
        }

        int length_label_segment = bb_length_label_segment.getInt();
        valueLabelTable_offset += value_label_table_length;

        // 2-3. 4-byte-integer array (4xm): offset values for the label sec.
        // these "label offsets" actually appear to represent the byte
        // offsets of the label strings, as stored in the next section.
        // as of now, these are not used for anything, and the code
        // below assumes that the labels are already in the same
        // order as the numeric values! -- L.A.
        int[] label_offsets = new int[no_value_label_pairs];
        int byte_offset = valueLabelTable_offset;

        for (int j = 0; j < no_value_label_pairs; j++) {

            // note: 4-byte singed, not java's long
            ByteBuffer bb_label_offset = ByteBuffer.wrap(valueLabelTable_i, byte_offset,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_label_offset.order(ByteOrder.LITTLE_ENDIAN);
                dbgLog.fine("label offset: byte reversed");
            }
            label_offsets[j] = bb_label_offset.getInt();
            dbgLog.fine("label offset [" + j + "]: " + label_offsets[j]);

            byte_offset += value_label_table_length;

        }

        // 2-4. 4-byte-integer array (4xm): value array (sorted)
        dbgLog.fine("value array");

        int[] valueList = new int[no_value_label_pairs];
        int offset_value = byte_offset;

        for (int k = 0; k < no_value_label_pairs; k++) {

            ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value,
                    value_label_table_length);
            if (isLittleEndian) {
                bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
            }
            valueList[k] = bb_value_list.getInt();

            offset_value += value_label_table_length;

        }

        // 2-5. m-byte chars that store label data (m units of labels)
        String label_segment = new String(
                Arrays.copyOfRange(valueLabelTable_i, offset_value, (length_label_segment + offset_value)),
                "ISO-8859-1");

        // L.A. -- 2011.2.25:
        // This assumes that the labels are already stored in the right
        // order: (see my comment for the section 2.3 above)
        //String[] labelList = label_segment.split("\0");
        // Instead, we should be using the offset values obtained in
        // the section 2.3 above, and select the corresponding
        // substrings:
        String[] labelList = new String[no_value_label_pairs];

        for (int l = 0; l < no_value_label_pairs; l++) {
            String lblString = null;
            int lblOffset = label_offsets[l];

            lblString = label_segment.substring(lblOffset);

            int nullIndx = lblString.indexOf('\000');

            if (nullIndx > -1) {
                lblString = lblString.substring(0, nullIndx);
            }

            labelList[l] = lblString;
        }

        // this should work! -- L.A.
        // (TODO: check the v105 value label parsing method, to see if
        // something similar applies there)

        // Finally, we've reached the actual value-label pairs. We'll go 
        // through them and put them on the temporary lookup map: 

        tempValueLabelTable.put(labelName, new LinkedHashMap<>());

        for (int l = 0; l < no_value_label_pairs; l++) {
            if (dbgLog.isLoggable(Level.FINE)) {
                dbgLog.fine(l + "-th pair:" + valueList[l] + "[" + labelList[l] + "]");
            }

            // TODO: do we need any null/empty string checks here? -- L.A. 4.0
            tempValueLabelTable.get(labelName).put(Integer.toString(valueList[l]), labelList[l]);
        }

        if (stream.available() == 0) {
            // reached the end of the file
            dbgLog.fine("reached the end of the file at " + i + "th value-label Table");
            break;
        }

    } // for nvar loop

    // And now we can go through the list of variables, see if any have 
    // value-label groups linked, then build dataverse VariableCategory 
    // objects for them, using the values stored in the temporary map 
    // we've just built:

    // TODO: this code is duplicated between this, and the "105 version" of
    // this method, above. Maybe it should be isolated in its own method.
    // -- L.A. 4.0
    for (int i = 0; i < nvar; i++) {
        if (valueLabelsLookupTable[i] != null) {
            if (tempValueLabelTable.get(valueLabelsLookupTable[i]) != null) {
                // What if it is null? -- is it a legit condition, that 
                // a variable was advertised as having categorical values,
                // but no such cat value group exists under this name?
                // -- L.A.
                for (String value : tempValueLabelTable.get(valueLabelsLookupTable[i]).keySet()) {
                    VariableCategory cat = new VariableCategory();

                    cat.setValue(value);
                    cat.setLabel(tempValueLabelTable.get(valueLabelsLookupTable[i]).get(value));

                    /* cross-link the variable and category to each other: */
                    cat.setDataVariable(dataTable.getDataVariables().get(i));
                    dataTable.getDataVariables().get(i).getCategories().add(cat);
                }
            }
        }
    }

    dbgLog.fine("parseValueLabelsRelease108(): end");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }//  w w w.j a v a 2  s .  c o m

    int varQnty = dataTable.getVarQuantity().intValue();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    variableFormatTypeList = new String[varQnty];
    dateFormatList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    int numberOfDecimalVariables = 0;

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha

    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    ///dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    ///dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                    casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                    // end of String var case

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);

                        casewiseRecordForTabFile.set(k, newDatum);
                        dateFormatList[k] = sdf_ymd.toPattern();
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");
                        // not treating DTIME as date/time; see comment elsewhere in 
                        // the code; 
                        // (but we do need to remember to treat the resulting values 
                        // as character strings, not numerics!)

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                            // TODO: 
                            // (for both datetime and "dateless" time)
                            // keep the longest of the matching formats - i.e., if there are *some*
                            // values in the vector that have thousands of a second, that should be 
                            // part of the saved format!
                            //  -- L.A. Aug. 12 2014 

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymdhms.toPattern();
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // datetime with milliseconds:
                                dateFormatList[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                if (dateFormatList[k] == null) {
                                    dateFormatList[k] = sdf_hms.toPattern();
                                }
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // time, possibly with milliseconds:
                                String format_hmsS = sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                if (dateFormatList[k] == null
                                        || (format_hmsS.length() > dateFormatList[k].length())) {
                                    dateFormatList[k] = format_hmsS;
                                }
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {

                } else {
                    if (variableTypelList.get(l) <= 0) {
                        if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                            decimalVariableSet.add(l);
                        }
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("reached the end of the file at " + ii + "th iteration");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    // contents check
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }/*from w  ww  .  j  a  v  a  2  s.co  m*/

    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    dbgLog.fine("printFormatNameTable:\n" + printFormatNameTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // contents (variable) checker concering decimals
    variableTypeFinal = new int[varQnty];
    Arrays.fill(variableTypeFinal, 0);

    int numberOfDecimalVariables = 0;

    List<String> casewiseRecordForTabFile = new ArrayList<String>();
    String[] caseWiseDateFormatForUNF = null;
    List<String> casewiseRecordForUNF = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            for (int el = 0; el < casewiseRecordForTabFile.size(); el++) {
                casewiseRecordForUNF.add(casewiseRecordForTabFile.get(el));
            }

            caseWiseDateFormatForUNF = new String[casewiseRecordForTabFile.size()];

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {
                    // String variable case: set to  -1
                    variableTypeFinal[k] = -1;

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    casewiseRecordForUNF.set(k, paddRemoved);
                    casewiseRecordForTabFile.set(k,
                            "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");

                    // end of String var case

                } else {
                    // numeric var case
                    if (casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
                        casewiseRecordForUNF.set(k, null);
                    }

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);
                        caseWiseDateFormatForUNF[k] = sdf_ymd.toPattern();

                        casewiseRecordForTabFile.set(k, newDatum);
                        casewiseRecordForUNF.set(k, newDatum);
                        //formatCategoryTable.put(variableNameList.get(k), "date");
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_hms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = this.sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            if (casewiseRecordForTabFile.size() > 0) {
                for (int ij = 0; ij < varQnty; ij++) {
                    dataTable2[ij][caseIndex - 1] = casewiseRecordForUNF.get(ij);
                    if (variableFormatTypeList[ij].equals("date")
                            || variableFormatTypeList[ij].equals("time")) {
                        this.dateFormats[ij][caseIndex - 1] = caseWiseDateFormatForUNF[ij];
                    }
                }
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                    variableTypeFinal[l] = -1;
                }

                if (variableTypeFinal[l] == 0) {
                    if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                        // l-th variable is not integer
                        variableTypeFinal[l] = 1;
                        decimalVariableSet.add(l);
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();
            casewiseRecordForUNF.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    smd.getFileInformation().put("caseQnty", caseQnty);
    smd.setDecimalVariables(decimalVariableSet);
    smd.setVariableFormatCategory(formatCategoryTable);

    // contents check
    dbgLog.fine("variableType=" + ArrayUtils.toString(variableTypeFinal));
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
    }/*from   w ww. ja v a2  s . co m*/

    PrintWriter pwout = createOutputWriter(stream);

    int varQnty = dataTable.getVarQuantity().intValue();
    int caseQnty = dataTable.getCaseQuantity().intValue();

    dbgLog.fine("varQnty: " + varQnty);

    dateFormatList = new String[varQnty];

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha
    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    try {
        // this compression is applied only to non-float data, i.e. integer;
        // 8-byte float datum is kept in tact
        boolean hasReachedEOF = false;

        OBSERVATION: while (true) {

            dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");

            byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];

            int nbytes = stream.read(octate);

            // processCompressedOBSblock ()

            // (this means process a block of 8 compressed OBS
            // values -- should result in 64 bytes of data total)

            for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {

                dbgLog.finer("i=" + i + "-th iteration");
                int octate_i = octate[i];
                //dbgLog.fine("octate="+octate_i);
                if (octate_i < 0) {
                    octate_i += 256;
                }
                int byteCode = octate_i;//octate_i & 0xF;
                //out.println("byeCode="+byteCode);

                // processCompressedOBS

                switch (byteCode) {
                case 252:
                    // end of the file
                    dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
                    hasReachedEOF = true;
                    break;
                case 253:
                    // FD: uncompressed data follows after this octate
                    // long string datum or float datum
                    // read the following octate
                    byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
                    int ucbytes = stream.read(uncompressedByte);
                    int typeIndex = (ii * OBS + i) % nOBS;

                    if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) {
                        // code= >0 |-1: string or its conitiguous block
                        // decode as a string object
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
                        //out.println("str_datum="+strdatum+"<-");
                        // add this non-missing-value string datum
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == -2) {
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1),
                                defaultCharSet);
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == 0) {
                        // code= 0: numeric

                        ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
                        if (isLittleEndian) {
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
                        }

                        Double ddatum = bb_double.getDouble();
                        // out.println("ddatum="+ddatum);
                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                        dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);

                    } else {
                        dbgLog.fine("SAV Reader: out-of-range exception");
                        throw new IOException("out-of-range value was found");
                    }

                    /*
                    // EOF-check after reading this octate
                    if (stream.available() == 0){
                    hasReachedEOF = true;
                    dbgLog.fine(
                    "SAV Reader: *** After reading an uncompressed octate," +
                    " reached the end of the file at "+ii
                    +"th iteration and i="+i+"th octate position [0-start] *****");
                    }
                     */

                    break;
                case 254:
                    // FE: used as the missing value for string variables
                    // an empty case in a string variable also takes this value
                    // string variable does not accept space-only data
                    // cf: uncompressed case
                    // 20 20 20 20 20 20 20 20
                    // add the string missing value
                    // out.println("254: String missing data");

                    casewiseRecordForTabFile.add(" "); // add "." here?

                    // Note that technically this byte flag (254/xFE) means
                    // that *eight* white space characters should be
                    // written to the output stream. This caused me
                    // a great amount of confusion, because it appeared
                    // to me that there was a mismatch between the number
                    // of bytes advertised in the variable metadata and
                    // the number of bytes actually found in the data
                    // section of a compressed SAV file; this is because
                    // these 8 bytes "come out of nowhere"; they are not
                    // written in the data section, but this flag specifies
                    // that they should be added to the output.
                    // Also, as I pointed out above, we are only writing
                    // out one whitespace character, not 8 as instructed.
                    // This appears to be legit; these blocks of 8 spaces
                    // seem to be only used for padding, and all such
                    // multiple padding spaces are stripped anyway during
                    // the post-processing.

                    break;
                case 255:
                    // FF: system missing value for numeric variables
                    // cf: uncompressed case (sysmis)
                    // FF FF FF FF FF FF eF FF(little endian)
                    // add the numeric missing value
                    dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
                    casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);

                    break;
                case 0:
                    // 00: do nothing
                    dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");

                    break;
                default:
                    //out.println("byte code(default)="+ byteCode);
                    if ((byteCode > 0) && (byteCode < 252)) {
                        // datum is compressed
                        //Integer unCompressed = Integer.valueOf(byteCode -100);
                        // add this uncompressed numeric datum
                        Double unCompressed = Double.valueOf(byteCode - 100);
                        dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);

                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
                        // out.println("uncompressed="+unCompressed);
                        // out.println("dataline="+casewiseRecordForTabFile);
                    }
                }// end of switch

                // out.println("end of switch");

                // The-end-of-a-case(row)-processing

                // this line that follows, and the code around it
                // is really confusing:
                int varCounter = (ii * OBS + i + 1) % nOBS;
                // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
                // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
                // the 2 values mean different things:
                // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
                // and OBS is the number of OBS blocks that we process
                // at a time. I.e., we process 8 chunks of 8 bytes at a time.
                // This is how data is organized inside an SAV file:
                // 8 bytes of compression flags, followd by 8x8 or fewer
                // (depending on the flags) bytes of compressed data.
                // I should rename this OBS variable something more
                // meaningful.
                //
                // Also, the "varCounter" variable name is entirely
                // misleading -- it counts not variables, but OBS blocks.

                dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");

                if ((ii * OBS + i + 1) % nOBS == 0) {

                    //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);

                    // out.println("all variables in a case are parsed == nOBS");
                    // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);

                    // check whether a string-variable's continuous block exits
                    // if so, they must be joined

                    if (hasStringVarContinuousBlock) {

                        // string-variable's continuous-block-concatenating-processing

                        //out.println("concatenating process starts");
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                        //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                        StringBuilder sb = new StringBuilder("");
                        int firstPosition = 0;

                        Set<Integer> removeJset = new HashSet<Integer>();
                        for (int j = 0; j < nOBS; j++) {
                            dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                            if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) {
                                // Continued String variable found at j-th
                                // position. look back the j-1
                                firstPosition = j - 1;
                                int lastJ = j;
                                String concatenated = null;

                                removeJset.add(j);
                                sb.append(casewiseRecordForTabFile.get(j - 1));
                                sb.append(casewiseRecordForTabFile.get(j));

                                for (int jc = 1;; jc++) {
                                    if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1)
                                            && (OBSwiseTypelList.get(j + jc) != -2))) {

                                        // j is the end unit of this string variable
                                        concatenated = sb.toString();
                                        sb.setLength(0);
                                        lastJ = j + jc;
                                        break;
                                    } else {
                                        sb.append(casewiseRecordForTabFile.get(j + jc));
                                        removeJset.add(j + jc);
                                    }
                                }
                                casewiseRecordForTabFile.set(j - 1, concatenated);

                                //out.println(j-1+"th concatenated="+concatenated);
                                j = lastJ - 1;

                            } // end-of-if: continuous-OBS only

                        } // end of loop-j

                        //out.println("removeJset="+removeJset);

                        // a new list that stores a new case with concatanated string data
                        List<String> newDataLine = new ArrayList<String>();

                        for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                            //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                            if (!removeJset.contains(jl)) {

                                //                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
                                //                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
                                //                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl)==null){
                                //                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
                                //                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
                                //                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
                                //                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                }

                                newDataLine.add(casewiseRecordForTabFile.get(jl));
                            } else {
                                //                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
                            }
                        } // end of loop-jl

                        //out.println("new casewiseRecordForTabFile="+newDataLine);
                        //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());

                        casewiseRecordForTabFile = newDataLine;

                    } // end-if: stringContinuousVar-exist case

                    // caseIndex starts from 1 not 0
                    caseIndex = (ii * OBS + i + 1) / nOBS;

                    for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                        dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));

                        if (variableTypelList.get(k) > 0) {

                            // Strip the String variables off the
                            // whitespace padding:

                            // [ snipped ]

                            // I've removed the block of code above where
                            // String values were substring()-ed to the
                            // length specified in the variable metadata;
                            // Doing that was not enough, since a string
                            // can still be space-padded inside its
                            // advertised capacity. (note that extended
                            // variables can have many kylobytes of such
                            // padding in them!) Plus it was completely
                            // redundant, since we are stripping all the
                            // trailing white spaces with
                            // StringUtils.stripEnd() below:

                            String paddRemoved = StringUtils
                                    .stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                            // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                            if (paddRemoved.equals("")) {
                                paddRemoved = " ";
                            }
                            //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                            casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                            // end of String var case

                        } // end of variable-type check

                        if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k)
                                .equals(MissingValueForTextDataFileNumeric)) {

                            String variableFormatType = variableFormatTypeList[k];
                            dbgLog.finer("k=" + k + "th printFormatTable format="
                                    + printFormatTable.get(variableNameList.get(k)));

                            int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                            if (variableFormatType.equals("date")) {
                                dbgLog.finer("date case");

                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;

                                String newDatum = sdf_ymd.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                /* saving date format */
                                dbgLog.finer("saving dateFormat[k] = " + sdf_ymd.toPattern());
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymd.toPattern();
                                //formatCategoryTable.put(variableNameList.get(k), "date");
                            } else if (variableFormatType.equals("time")) {
                                dbgLog.finer("time case:DTIME or DATETIME or TIME");
                                //formatCategoryTable.put(variableNameList.get(k), "time");

                                if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {
                                    // We're not even going to try to handle "DTIME"
                                    // values as time/dates in dataverse; this is a weird
                                    // format that nobody uses outside of SPSS.
                                    // (but we do need to remember to treat the resulting values 
                                    // as character strings, not numerics!)

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_BIAS;
                                        String newDatum = sdf_dhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_dhms.format(new Date(dateDatum)));
                                        dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }

                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                                    // TODO: 
                                    // (for both datetime and "dateless" time)
                                    // keep the longest of the matching formats - i.e., if there are *some*
                                    // values in the vector that have thousands of a second, that should be 
                                    // part of the saved format!
                                    //  -- L.A. Aug. 12 2014 
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_OFFSET;
                                        String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        dateFormatList[k] = sdf_ymdhms.toPattern();
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_ymdhms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        dateFormatList[k] = sdf_ymdhms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                                    // TODO: 
                                    // double-check that we are handling "dateless" time correctly... -- L.A. Aug. 2014
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
                                        String newDatum = sdf_hms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        if (dateFormatList[k] == null) {
                                            dateFormatList[k] = sdf_hms.toPattern();
                                        }
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_hms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());

                                        String format_hmsS = sdf_hms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        if (dateFormatList[k] == null
                                                || (format_hmsS.length() > dateFormatList[k].length())) {
                                            dateFormatList[k] = format_hmsS;
                                        }
                                    }
                                }

                            } else if (variableFormatType.equals("other")) {
                                dbgLog.finer("other non-date/time case:=" + i);

                                if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                                    // day of week
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                                    // month
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                }
                            }

                        } // end: date-time-datum check

                    } // end: loop-k(2nd: variable-wise-check)

                    // write to tab file
                    if (casewiseRecordForTabFile.size() > 0) {
                        pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
                    }

                    // numeric contents-check
                    for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                        if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                                || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                                || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                            // TODO: 
                            // figure out if any special handling is still needed here in 4.0. 
                            // -- L.A. - Aug. 2014

                        } else {
                            if (variableTypelList.get(l) <= 0) {
                                if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                                    decimalVariableSet.add(l);
                                }
                            }
                        }
                    }

                    // reset the case-wise working objects
                    casewiseRecordForTabFile.clear();

                    if (caseQnty > 0) {
                        if (caseIndex == caseQnty) {
                            hasReachedEOF = true;
                        }
                    }

                    if (hasReachedEOF) {
                        break;
                    }

                } // if(The-end-of-a-case(row)-processing)

            } // loop-i (OBS unit)

            if ((hasReachedEOF) || (stream.available() == 0)) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break OBSERVATION;
            }

            ii++;

        } // while loop

        pwout.close();
    } catch (IOException ex) {
        throw ex;
    }

    dbgLog.fine("<<<<<<");
    dbgLog.fine("formatCategoryTable = " + formatCategoryTable);
    dbgLog.fine(">>>>>>");

    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("decodeRecordTypeDataCompressed(): end");
}