edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.rdata.RDATAFileReader.java Source code

Java tutorial

Introduction

Here is the source code for edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.rdata.RDATAFileReader.java

Source

/*
   Copyright (C) 2005-2013, by the President and Fellows of Harvard College.
    
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    
     http://www.apache.org/licenses/LICENSE-2.0
    
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
    
   Dataverse Network - A web application to share, preserve and analyze research data.
   Developed at the Institute for Quantitative Social Science, Harvard University.
   Version 3.0.
*/
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.rdata;

import java.io.*;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.text.*;
import java.util.logging.*;
import java.util.*;
import java.security.NoSuchAlgorithmException;

import javax.inject.Inject;

// Rosuda Wrappers and Methods for R-calls to Rserve
import org.rosuda.REngine.REXP;
import org.rosuda.REngine.REXPMismatchException;
import org.rosuda.REngine.RList;
import org.rosuda.REngine.Rserve.RFileInputStream;
import org.rosuda.REngine.Rserve.RFileOutputStream;
import org.rosuda.REngine.Rserve.*;

import edu.harvard.iq.dataverse.DataTable;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VariableCategory;

import edu.harvard.iq.dataverse.ingest.plugin.spi.*;
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader;
import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi;
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest;
import edu.harvard.iq.dataverse.rserve.*;
import javax.naming.Context;
import javax.naming.InitialContext;
import javax.naming.NamingException;

import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.ArrayUtils;

/**
 * Dataverse 4.0 implementation of <code>TabularDataFileReader</code> for the 
 * RData Binary Format.
 * 
 * Based on the original implementation for DVN v3.*, by Matt Owen (2012-2013),
 * completed by Leonid Andreev in 2013. 
 * 
 * This version is a serious re-write of the plugin, using the new 4.0 
 * ingest plugin architecture. 
 * 
 * original 
 * @author Matthew Owen
 * @author Leonid Andreev
     
 * This implementation uses external R-Scripts to do the bulk of the processing.
 */
public class RDATAFileReader extends TabularDataFileReader {

    // Date-time things
    public static final String[] FORMATS = { "other", "date", "date-time", "date-time-timezone" };

    // R-ingest recognition files
    private static final String[] FORMAT_NAMES = { "RDATA", "Rdata", "rdata" };
    private static final String[] EXTENSIONS = { "Rdata", "rdata" };
    private static final String[] MIME_TYPE = { "application/x-rlang-transport" };

    // R Scripts
    static private String RSCRIPT_CREATE_WORKSPACE = "";
    static private String RSCRIPT_DATASET_INFO_SCRIPT = "";
    static private String RSCRIPT_GET_DATASET = "";
    static private String RSCRIPT_GET_LABELS = "";
    static private String RSCRIPT_WRITE_DVN_TABLE = "";

    // RServe static variables
    private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host");
    private static String RSERVE_USER = System.getProperty("dataverse.rserve.user");
    private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.pwrd");
    private static int RSERVE_PORT;

    // TODO: 
    // we're not using these time/data formats for anything, are we?
    // DATE FORMATS
    private static SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[] { new SimpleDateFormat("yyyy-MM-dd") };

    // TIME FORMATS
    private static SimpleDateFormat[] TIME_FORMATS = new SimpleDateFormat[] {
            // Date-time up to milliseconds with timezone, e.g. 2013-04-08 13:14:23.102 -0500
            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS z"),
            // Date-time up to milliseconds, e.g. 2013-04-08 13:14:23.102
            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"),
            // Date-time up to seconds with timezone, e.g. 2013-04-08 13:14:23 -0500
            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z"),
            // Date-time up to seconds and no timezone, e.g. 2013-04-08 13:14:23
            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") };

    // Logger
    private static final Logger LOG = Logger.getLogger(RDATAFileReader.class.getPackage().getName());

    TabularDataIngest ingesteddata = new TabularDataIngest();
    private DataTable dataTable = new DataTable();

    // Process ID, used partially in the generation of temporary directories
    private String mPID;

    // Object containing all the informatin for an R-workspace (including
    // temporary directories on and off server)
    private RWorkspace mRWorkspace;

    // Number formatter
    NumberFormat doubleNumberFormatter = new DecimalFormat();

    // Builds R Requests for an R-server
    private RRequestBuilder mRequestBuilder;
    /*
     * Initialize Static Variables
     * This is primarily to construct the R-Script
     */
    static {
        /*
         * Set defaults fallbacks for class properties
         */
        if (RSERVE_HOST == null)
            RSERVE_HOST = "localhost";

        if (RSERVE_USER == null)
            RSERVE_USER = "rserve";

        if (RSERVE_PASSWORD == null)
            RSERVE_PASSWORD = "rserve";

        if (System.getProperty("dataverse.ingest.rserve.port") == null)
            RSERVE_PORT = 6311;
        else
            RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port"));

        // Load R Scripts into memory, so that we can run them via R-serve
        RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R");
        RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R");
        RSCRIPT_CREATE_WORKSPACE = readLocalResource("scripts/create.workspace.R");
        RSCRIPT_GET_LABELS = readLocalResource("scripts/get.labels.R");
        RSCRIPT_DATASET_INFO_SCRIPT = readLocalResource("scripts/dataset.info.script.R");

        LOG.finer("R SCRIPTS AS STRINGS --------------");
        LOG.finer(RSCRIPT_WRITE_DVN_TABLE);
        LOG.finer(RSCRIPT_GET_DATASET);
        LOG.fine(RSCRIPT_CREATE_WORKSPACE);
        LOG.finer(RSCRIPT_GET_LABELS);
        LOG.finer(RSCRIPT_DATASET_INFO_SCRIPT);
        LOG.finer("END OF R SCRIPTS AS STRINGS -------");
    }

    /* 
     * TODO: 
     * Switch to the implementation in iq.dataverse.rserve
     * -- L.A. 4.0 alpha 1
    */
    private class RWorkspace {
        public String mParent, mWeb, mDvn, mDsb;
        public File mDataFile, mCsvDataFile;
        public RRequest mRRequest;
        public BufferedInputStream mInStream;

        /**
         * 
         */
        public RWorkspace() {
            mParent = mWeb = mDvn = mDsb = "";
            mDataFile = null;
            mCsvDataFile = null;
            mInStream = null;
        }

        /**
         * Create the Actual R Workspace
         */
        public void create() {
            try {
                LOG.fine("RDATAFileReader: Creating R Workspace");
                RRequestBuilder scriptBuilder = mRequestBuilder.script(RSCRIPT_CREATE_WORKSPACE);
                LOG.fine("got a sript request builder");

                RRequest scriptRequest = scriptBuilder.build();
                LOG.fine("script request built.");

                /*
                REXP result = mRequestBuilder
                        .script(RSCRIPT_CREATE_WORKSPACE)
                        .build()
                        .eval();
                */
                REXP result = scriptRequest.eval();

                LOG.fine("evaluated the script");

                RList directoryNames = result.asList();

                mParent = null;

                if (directoryNames != null) {
                    if (directoryNames.at("parent") != null) {
                        mParent = directoryNames.at("parent").asString();
                    } else {
                        LOG.fine("WARNING: directoryNames at \"parent\" is null!");
                        if (directoryNames.isEmpty()) {
                            LOG.fine("WARNING: directoryNames is empty!");
                        } else {
                            Set<String> dirKeySet = directoryNames.keySet();
                            Iterator iter = dirKeySet.iterator();
                            String key;

                            while (iter.hasNext()) {
                                key = (String) iter.next();
                                LOG.fine("directoryNames list key: " + key);
                            }
                        }
                    }

                } else {
                    LOG.fine("WARNING: directoryNames is null!");
                }

                LOG.fine(String.format("RDATAFileReader: Parent directory of R Workspace is %s", mParent));

                LOG.fine("RDATAFileReader: Creating file handle");

                mDataFile = new File(mParent, "data.Rdata");
            } catch (Exception E) {
                LOG.warning("RDATAFileReader: Could not create R workspace");
                mParent = mWeb = mDvn = mDsb = "";
            }
        }

        /**
         * Destroy the Actual R Workspace
         */
        public void destroy() {
            String destroyerScript = new StringBuilder("")
                    .append(String.format("unlink(\"%s\", TRUE, TRUE)", mParent)).toString();

            try {
                LOG.fine("RDATAFileReader: Destroying R Workspace");

                mRRequest = mRequestBuilder.script(destroyerScript).build();

                mRRequest.eval();

                LOG.fine("RDATAFileReader: DESTROYED R Workspace");
            } catch (Exception ex) {
                LOG.warning("RDATAFileReader: R Workspace was not destroyed");
                LOG.fine(ex.getMessage());
            }
        }

        /**
         * Create the Data File to Use for Analysis, etc.
         */
        public File dataFile(String target, String prefix, int size) {

            String fileName = String.format("DVN.dataframe.%s.Rdata", mPID);

            mDataFile = new File(mParent, fileName);

            RFileInputStream RInStream = null;
            OutputStream outStream = null;

            RRequest req = mRequestBuilder.build();

            try {
                outStream = new BufferedOutputStream(new FileOutputStream(mDataFile));
                RInStream = req.getRConnection().openFile(target);

                if (size < 1024 * 1024 * 500) {
                    int bufferSize = size;
                    byte[] outputBuffer = new byte[bufferSize];
                    RInStream.read(outputBuffer);
                    outStream.write(outputBuffer, 0, size);
                }

                RInStream.close();
                outStream.close();
                return mDataFile;
            } catch (FileNotFoundException exc) {
                exc.printStackTrace();
                LOG.warning("RDATAFileReader: FileNotFound exception occurred");
                return mDataFile;
            } catch (IOException exc) {
                exc.printStackTrace();
                LOG.warning("RDATAFileReader: IO exception occurred");
            }

            // Close R input data stream
            if (RInStream != null) {
                try {
                    RInStream.close();
                } catch (IOException exc) {
                }
            }

            // Close output data stream
            if (outStream != null) {
                try {
                    outStream.close();
                } catch (IOException ex) {
                }
            }

            return mDataFile;
        }

        /**
         * Set the stream
         * @param inStream 
         */
        public void stream(BufferedInputStream inStream) {
            mInStream = inStream;
        }

        /**
         * Save the Rdata File Temporarily
         */
        private File saveRdataFile() {
            LOG.fine("RDATAFileReader: Saving Rdata File from Input Stream");

            if (mInStream == null) {
                LOG.fine("RDATAFileReader: No input stream was specified. Not writing file and returning NULL");
                return null;
            }

            byte[] buffer = new byte[1024];
            int bytesRead = 0;
            RFileOutputStream outStream = null;
            RConnection rServerConnection = null;

            try {
                LOG.fine("RDATAFileReader: Opening R connection");
                rServerConnection = new RConnection(RSERVE_HOST, RSERVE_PORT);

                LOG.fine("RDATAFileReader: Logging into R connection");
                rServerConnection.login(RSERVE_USER, RSERVE_PASSWORD);

                LOG.fine("RDATAFileReader: Attempting to create file");
                outStream = rServerConnection.createFile(mDataFile.getAbsolutePath());

                LOG.fine(String.format("RDATAFileReader: File created on server at %s",
                        mDataFile.getAbsolutePath()));
            } catch (IOException ex) {
                LOG.warning("RDATAFileReader: Could not create file on R Server");
            } catch (RserveException ex) {
                LOG.warning("RDATAFileReader: Could not connect to R Server");
            }

            /*
             * Read stream and write to destination file
             */
            try {
                // Read from local file and write to rserver 1kb at a time
                while (mInStream.read(buffer) != -1) {
                    outStream.write(buffer);
                    bytesRead++;
                }
            } catch (IOException ex) {
                LOG.warning("RDATAFileReader: Could not write to file");
                LOG.fine(String.format("Error message: %s", ex.getMessage()));
            } catch (NullPointerException ex) {
                LOG.warning("RDATAFileReader: Data file has not been specified");
            }

            // Closing R server connection
            if (rServerConnection != null) {
                LOG.fine("RDATAFileReader: Closing R server connection");
                rServerConnection.close();
            }

            return mDataFile;
        }

        private File saveCsvFile() {
            // Specify CSV File Location on Server
            mCsvDataFile = new File(mRWorkspace.getRdataFile().getParent(), "data.csv");

            // 
            String csvScript = new StringBuilder("").append("options(digits.secs=3)").append("\n")
                    .append(RSCRIPT_WRITE_DVN_TABLE).append("\n")
                    .append(String.format("load(\"%s\")", mRWorkspace.getRdataAbsolutePath())).append("\n")
                    .append(RSCRIPT_GET_DATASET).append("\n")
                    .append(String.format("write.dvn.table(data.set, file=\"%s\")", mCsvDataFile.getAbsolutePath()))
                    .toString();

            // 
            RRequest csvRequest = mRequestBuilder.build();

            LOG.fine(String.format("RDATAFileReader: Attempting to write table to `%s`",
                    mCsvDataFile.getAbsolutePath()));
            csvRequest.script(csvScript).eval();

            return mCsvDataFile;
        }

        /**
         * Return Rdata File Handle on R Server
         * @return File asdasd 
         */
        public File getRdataFile() {
            return mDataFile;
        }

        /**
         * Return Location of Rdata File on R Server
         * @return the file location as a string on the (potentially) remote R server
         */
        public String getRdataAbsolutePath() {
            return mDataFile.getAbsolutePath();
        }
    }

    /**
     * Constructs a <code>RDATAFileReader</code> instance from its "Spi" Class
     * @param originator a <code>StatDataFileReaderSpi</code> object.
     */
    public RDATAFileReader(TabularDataFileReaderSpi originator) {

        super(originator);

        LOG.fine("RDATAFileReader: INSIDE RDATAFileReader");

        // Create request builder.
        // This object is used throughout as an RRequest factory
        mRequestBuilder = new RRequestBuilder().host(RSERVE_HOST).port(RSERVE_PORT).user(RSERVE_USER)
                .password(RSERVE_PASSWORD);

        // Create R Workspace
        mRWorkspace = new RWorkspace();

        mPID = RandomStringUtils.randomNumeric(6);
    }

    private void init() throws IOException {
        doubleNumberFormatter.setGroupingUsed(false);
        doubleNumberFormatter.setMaximumFractionDigits(340);

    }

    /**
     * Read the Given RData File
     * @param stream a <code>BufferedInputStream</code>.
     * @param ignored
     * @return an <code>TabularDataIngest</code> object
     * @throws java.io.IOException if a reading error occurs.
     */
    @Override
    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {

        init();

        // Create Request object
        LOG.fine("RDATAFileReader: Creating RRequest object from RRequestBuilder object");

        try {
            // Create R Workspace
            mRWorkspace.stream(stream);
            mRWorkspace.create();
            mRWorkspace.saveRdataFile();
            mRWorkspace.saveCsvFile();

            // Copy CSV file to a local, temporary directory
            // Additionally, this sets the "tabDelimitedDataFile" property of the FileInformation
            File localCsvFile = transferCsvFile(mRWorkspace.mCsvDataFile);

            // Generate and save all the information about data set; this creates all 
            // the DataVariable objects, among other things:
            getDataFrameInformation();

            // Read and parse the TAB-delimited file saved by R, above; do the 
            // necessary post-processinga and filtering, and save the resulting 
            // TAB file as tabFileDestination, below. This is the file we'll be 
            // using to calculate the UNF, and for the storage/preservation of the
            // dataset. 
            // IMPORTANT: this must be done *after* the variable metadata has been 
            // created!
            // - L.A. 
            RTabFileParser csvFileReader = new RTabFileParser('\t');
            BufferedReader localBufferedReader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(localCsvFile), "UTF-8"));

            File tabFileDestination = File.createTempFile("data-", ".tab");
            PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8");

            int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter);

            LOG.fine("RDATAFileReader: successfully read " + lineCount + " lines of tab-delimited data.");

            dataTable.setUnf("UNF:pending");

            ingesteddata.setTabDelimitedFile(tabFileDestination);
            ingesteddata.setDataTable(dataTable);

            // Destroy R workspace
            mRWorkspace.destroy();
        } catch (Exception ex) {
            throw new IOException("Unknown exception occured during ingest; " + ex.getMessage());
        }

        LOG.fine("RDATAFileReader: Leaving \"read\" function");

        return ingesteddata;
    }

    /**
     * Copy Remote File on R-server to a Local Target
     * @param target a target on the remote r-server
     * @return 
     */
    private File transferCsvFile(File target) {
        File destination;
        FileOutputStream csvDestinationStream;

        try {
            destination = File.createTempFile("data", ".csv");
            LOG.fine(String.format("RDATAFileReader: Writing local CSV File to `%s`",
                    destination.getAbsolutePath()));
            csvDestinationStream = new FileOutputStream(destination);
        } catch (IOException ex) {
            LOG.warning("RDATAFileReader: Could not create temporary file!");
            return null;
        }

        try {
            // Open connection to R-serve
            RConnection rServeConnection = new RConnection(RSERVE_HOST, RSERVE_PORT);
            rServeConnection.login(RSERVE_USER, RSERVE_PASSWORD);

            // Open file for reading from R-serve
            RFileInputStream rServeInputStream = rServeConnection.openFile(target.getAbsolutePath());

            int b;

            LOG.fine("RDATAFileReader: Beginning to write to local destination file");

            // Read from stream one character at a time
            while ((b = rServeInputStream.read()) != -1) {
                // Write to the *local* destination file
                csvDestinationStream.write(b);
            }

            LOG.fine(String.format("RDATAFileReader: Finished writing from destination `%s`",
                    target.getAbsolutePath()));
            LOG.fine(String.format("RDATAFileReader: Finished copying to source `%s`",
                    destination.getAbsolutePath()));

            LOG.fine("RDATAFileReader: Closing CSVFileReader R Connection");
            rServeConnection.close();
        }
        /*
         * TO DO: Make this error catching more intelligent
         */
        catch (Exception ex) {
        }

        return destination;
    }

    /**
     *
     * Runs an R-script that extracts meta-data from the *original* Rdata
     * object, then parses its output and creates DataVariable objects.
     *
     * @throws IOException if something bad happens?
     */
    private void getDataFrameInformation() {
        LOG.fine("RDATAFileReader: Entering `getDataFrameInformation` function");

        // Store variable names
        String[] variableNames = {};

        String parentDirectory = mRWorkspace.getRdataFile().getParent();

        String fileInfoScript = new StringBuilder("")
                .append(String.format("load(\"%s\")\n", mRWorkspace.getRdataAbsolutePath()))
                .append(String.format("setwd(\"%s\")\n", parentDirectory)).append(RSCRIPT_GET_DATASET).append("\n")
                .append(RSCRIPT_DATASET_INFO_SCRIPT).toString();

        try {
            RRequest request = mRequestBuilder.build();
            request.script(fileInfoScript);
            RList fileInformation = request.eval().asList();

            RList metaInfo = fileInformation.at("meta.info").asList();

            int varQnty = 0;
            variableNames = fileInformation.at("varNames").asStrings();

            //mDataTypes = fileInformation.at("dataTypes").asStrings();

            // Initialize variables: 
            List<DataVariable> variableList = new ArrayList<>();

            for (String varName : variableNames) {
                DataVariable dv = new DataVariable();
                dv.setName(varName);
                dv.setLabel(varName);
                // TODO:
                // Check if variables have real descriptive labels defined, 
                // via the mechanismm provided by that special optional package... 
                // (?) -- L.A. 
                dv.setInvalidRanges(new ArrayList<>());
                dv.setSummaryStatistics(new ArrayList<>());
                dv.setUnf("UNF:6:XYZXYZXYZ");
                dv.setCategories(new ArrayList<>());
                variableList.add(dv);

                dv.setFileOrder(varQnty);

                dv.setDataTable(dataTable);

                // variableLabels.put(varName, varName);
                // variableNameList.add(varName);
                varQnty++;
            }

            dataTable.setVarQuantity(new Long(varQnty));
            dataTable.setDataVariables(variableList);

            // Get the Variable Meta Data Table while Populating 
            processVariableInfo(metaInfo, dataTable);

            if (fileInformation.at("caseQnty") != null) {
                int caseQuantity = 0;
                try {
                    caseQuantity = fileInformation.at("caseQnty").asInteger();
                } catch (REXPMismatchException rexp) {
                    // bummer! - but not fatal. 
                }
                if (caseQuantity > 0) {
                    dataTable.setCaseQuantity(new Long(caseQuantity));
                }
            }
        } catch (REXPMismatchException ex) {
            LOG.warning("RDATAFileReader: Could not put information correctly");
        } catch (Exception ex) {
            ex.printStackTrace();
            LOG.warning(ex.getMessage());
        }

    }

    /**
    * Read a Local Resource and Return Its Contents as a String
    * <code>readLocalResource</code> searches the local path around the class
    * <code>RDATAFileReader</code> for a file and returns its contents as a
    * string.
    * @param path String specifying the name of the local file to be converted
    * into a UTF-8 string.
    * @return a UTF-8 <code>String</code>
    */
    private static String readLocalResource(String path) {
        // Debug
        LOG.fine(String.format("RDATAFileReader: readLocalResource: reading local path \"%s\"", path));

        // Get stream
        InputStream resourceStream = RDATAFileReader.class.getResourceAsStream(path);
        String resourceAsString = "";

        // Try opening a buffered reader stream
        try {
            BufferedReader rd = new BufferedReader(new InputStreamReader(resourceStream, "UTF-8"));

            String line = null;
            while ((line = rd.readLine()) != null) {
                resourceAsString = resourceAsString.concat(line + "\n");
            }
            resourceStream.close();
        } catch (IOException ex) {
            LOG.warning(String.format(
                    "RDATAFileReader: (readLocalResource) resource stream from path \"%s\" was invalid", path));
        }

        // Return string
        return resourceAsString;
    }

    /**
     * Get a HashMap matching column number to meta-data used in re-creating R
     * Objects
     *
     * @param metaInfo an "RList" Object containing indices - type, type.string,
     * class, levels, and format.
     * @param dataTable a dataverse DataTable object
     */
    private void processVariableInfo(RList metaInfo, DataTable dataTable) throws IOException {
        // list(type = 1, type.string = "integer", class = class(values), levels = NULL, format = NULL)
        Integer variableType = -1;
        String variableTypeName = "", variableFormat = "";
        String[] variableLevels = null;

        for (int k = 0; k < metaInfo.size(); k++) {

            try {

                // Meta-data for a column in the data-set
                RList columnMeta = metaInfo.at(k).asList();

                // Extract information from the returned list
                variableType = !columnMeta.at("type").isNull() ? columnMeta.at("type").asInteger() : null;
                variableTypeName = !columnMeta.at("type.string").isNull() ? columnMeta.at("type.string").asString()
                        : null;
                variableLevels = !columnMeta.at("levels").isNull() ? columnMeta.at("levels").asStrings()
                        : new String[0];
                variableFormat = !columnMeta.at("format").isNull() ? columnMeta.at("format").asString() : null;

                LOG.fine("variable type: " + variableType);
                LOG.fine("variable type name: " + variableTypeName);
                LOG.fine("variable format: " + variableFormat);

                for (String variableLevel : variableLevels) {
                    LOG.fine("variable level: " + variableLevel);
                }

                //dataTable.getDataVariables().get(k).setFormatSchema("RDATA");

                if (variableTypeName == null || variableTypeName.equals("character")
                        || variableTypeName.equals("other")) {
                    // This is a String: 
                    dataTable.getDataVariables().get(k).setTypeCharacter();
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();

                } else if (variableTypeName.equals("integer")) {
                    dataTable.getDataVariables().get(k).setTypeNumeric();
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();

                } else if (variableTypeName.equals("numeric") || variableTypeName.equals("double")) {
                    dataTable.getDataVariables().get(k).setTypeNumeric();
                    dataTable.getDataVariables().get(k).setIntervalContinuous();

                } else if (variableTypeName.startsWith("Date")) {
                    dataTable.getDataVariables().get(k).setTypeCharacter();
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
                    dataTable.getDataVariables().get(k).setFormat(variableFormat);

                    // instead:
                    if (variableTypeName.equals("Date")) {
                        dataTable.getDataVariables().get(k).setFormatCategory("date");
                    } else if (variableTypeName.equals("DateTime")) {
                        dataTable.getDataVariables().get(k).setFormatCategory("time");
                    }

                } else if (variableTypeName.equals("factor")) {

                    // All R factors are *string* factors!
                    dataTable.getDataVariables().get(k).setTypeCharacter();
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
                    if (variableLevels != null && variableLevels.length > 0) {
                        // yes, this is a factor, with levels defined.
                        LOG.fine("this is a factor.");
                        boolean ordered = false;

                        if (variableFormat != null && variableFormat.equals("ordered")) {
                            LOG.fine("an ordered factor, too");
                            ordered = true;
                        }

                        for (int i = 0; i < variableLevels.length; i++) {
                            VariableCategory cat = new VariableCategory();
                            cat.setValue(variableLevels[i]);
                            // Sadly, R factors don't have descriptive labels;
                            cat.setLabel(variableLevels[i]);

                            if (ordered) {
                                cat.setOrder(i + 1);
                            }

                            /* cross-link the variable and category to each other: */
                            cat.setDataVariable(dataTable.getDataVariables().get(k));
                            dataTable.getDataVariables().get(k).getCategories().add(cat);
                        }

                        dataTable.getDataVariables().get(k).setOrderedCategorical(ordered);

                    }

                } // And finally, a special case for logical variables: 
                  // For all practical purposes, they are handled as numeric factors
                  // with 0 and 1 for the values and "FALSE" and "TRUE" for the labels.
                  // (so this can also be used as an example of ingesting a *numeric* 
                  // categorical variable - as opposed to *string* categoricals, that
                  // we turn R factors into - above.
                else if ("logical".equals(variableTypeName)) {
                    dataTable.getDataVariables().get(k).setFormatCategory("Boolean");

                    dataTable.getDataVariables().get(k).setTypeNumeric();
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();

                    String booleanFactorLabels[] = new String[2];
                    booleanFactorLabels[0] = "FALSE";
                    booleanFactorLabels[1] = "TRUE";

                    String booleanFactorValues[] = new String[2];
                    booleanFactorValues[0] = "0";
                    booleanFactorValues[1] = "1";

                    for (int i = 0; i < 2; i++) {
                        VariableCategory cat = new VariableCategory();
                        cat.setValue(booleanFactorValues[i]);
                        // Sadly, R factors don't have descriptive labels;
                        cat.setLabel(booleanFactorLabels[i]);

                        /* cross-link the variable and category to each other: */
                        cat.setDataVariable(dataTable.getDataVariables().get(k));
                        dataTable.getDataVariables().get(k).getCategories().add(cat);
                    }
                }

                // Store the meta-data in a hashmap (to return later)
            } catch (REXPMismatchException ex) {
                // If something went wrong, then it wasn't meant to be for that column.
                // And you know what? That's okay.
                ex.printStackTrace();
                LOG.fine(String.format("Could not process variable %d of the data frame.", k));
            }
        }
    }
}