Example usage for java.util Arrays deepToString

Introduction

In this page you can find the example usage for java.util Arrays deepToString.

Prototype

public static String deepToString(Object[] a)

Source Link

Document

Returns a string representation of the "deep contents" of the specified array.

Usage

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTA117FileReader.java

private void readData(DataReader reader) throws IOException {
    logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: "
            + dtaMap.getOffset_data());/* w w w  .  j  a  va 2  s . c o m*/
    logger.fine("readData(): start");
    reader.readOpeningTag(TAG_DATA);
    // TODO: 
    // check that we are at the right byte offset!

    int nvar = dataTable.getVarQuantity().intValue();
    int nobs = dataTable.getCaseQuantity().intValue();

    int[] variableByteLengths = getVariableByteLengths(variableTypes);
    int bytes_per_row = calculateBytesPerRow(variableByteLengths);

    logger.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")");
    logger.fine("bytes per row=" + bytes_per_row + " bytes");
    logger.fine("variableTypes=" + Arrays.deepToString(variableTypes));

    // create a File object to save the tab-delimited data file
    FileOutputStream fileOutTab = null;
    PrintWriter pwout = null;
    File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");

    // save the temp tab-delimited file in the return ingest object:        
    ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);

    fileOutTab = new FileOutputStream(tabDelimitedDataFile);
    pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);

    logger.fine("Beginning to read data stream.");

    for (int i = 0; i < nobs; i++) {
        //byte[] dataRowBytes = new byte[bytes_per_row];
        Object[] dataRow = new Object[nvar];

        //int nbytes = stream.read(dataRowBytes, 0, bytes_per_row);
        //dataRowBytes = reader.readBytes(bytes_per_row);
        // TODO: 
        // maybe intercept any potential exceptions here, and add more 
        // diagnostic info, before re-throwing...
        int byte_offset = 0;
        for (int columnCounter = 0; columnCounter < nvar; columnCounter++) {

            String varType = variableTypes[columnCounter];

            // 4.0 Check if this is a time/date variable: 
            boolean isDateTimeDatum = false;
            // TODO: 
            // make sure the formats are properly set! -- use the old 
            // plugin as a model... 
            String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory();
            if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) {
                isDateTimeDatum = true;
            }

            // TODO: 
            // ditto
            String variableFormat = dateVariableFormats[columnCounter];

            if (varType == null || varType.equals("")) {
                throw new IOException("Undefined variable type encountered in readData()");
            }

            // TODO: 
            // double-check that the missing values constants are still correct!
            if (varType.equals("Byte")) {
                // (signed) Byte
                byte byte_datum = reader.readSignedByte();

                logger.fine(i + "-th row " + columnCounter + "=th column byte =" + byte_datum);
                if (byte_datum >= BYTE_MISSING_VALUE) {
                    logger.fine(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    dataRow[columnCounter] = byte_datum;
                    logger.fine(i + "-th row " + columnCounter + "-th column byte value=" + byte_datum);
                }

                byte_offset++;
            } else if (varType.equals("Integer")) {
                short short_datum = (short) reader.readShortSignedInteger();

                logger.fine(i + "-th row " + columnCounter + "=th column stata int =" + short_datum);

                if (short_datum >= INT_MISSIG_VALUE) {
                    logger.fine(i + "-th row " + columnCounter + "=th column stata long missing value="
                            + short_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {

                        DecodedDateTime ddt = decodeDateTimeData("short", variableFormat,
                                Short.toString(short_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        //dateFormat[columnCounter][i] = ddt.format;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = short_datum;
                        logger.fine(
                                i + "-th row " + columnCounter + "-th column \"integer\" value=" + short_datum);
                    }
                }
                byte_offset += 2;
            } else if (varType.equals("Long")) {
                // stata-Long (= java's int: 4 byte), signed.

                int int_datum = reader.readSignedInteger();

                if (int_datum >= LONG_MISSING_VALUE) {
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("int", variableFormat,
                                Integer.toString(int_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = int_datum;
                        logger.fine(i + "-th row " + columnCounter + "-th column \"long\" value=" + int_datum);
                    }

                }
                byte_offset += 4;
            } else if (varType.equals("Float")) {
                // STATA float 
                // same as Java float - 4-byte

                float float_datum = reader.readFloat();

                logger.fine(i + "-th row " + columnCounter + "=th column float =" + float_datum);
                if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
                    logger.fine(
                            i + "-th row " + columnCounter + "=th column float missing value=" + float_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;

                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("float", variableFormat,
                                doubleNumberFormatter.format(float_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = float_datum;
                        logger.fine(i + "-th row " + columnCounter + "=th column float value:" + float_datum);
                        // This may be temporary - but for now (as in, while I'm testing 
                        // 4.0 ingest against 3.* ingest, I need to be able to tell if a 
                        // floating point value was a single, or double float in the 
                        // original STATA file: -- L.A. Jul. 2014
                        dataTable.getDataVariables().get(columnCounter).setFormat("float");
                        // ?
                    }

                }
                byte_offset += 4;
            } else if (varType.equals("Double")) {
                // STATA double
                // same as Java double - 8-byte

                double double_datum = reader.readDouble();

                if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
                    logger.finer(
                            i + "-th row " + columnCounter + "=th column double missing value=" + double_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("double", variableFormat,
                                doubleNumberFormatter.format(double_datum));
                        logger.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        logger.fine(i + "-th row " + columnCounter + "=th column double value:" + double_datum); //doubleNumberFormatter.format(double_datum));

                        dataRow[columnCounter] = double_datum; //doubleNumberFormatter.format(double_datum);
                    }

                }
                byte_offset += 8;
            } else if (varType.matches("^STR[1-9][0-9]*")) {
                // String case
                int strVarLength = variableByteLengths[columnCounter];
                logger.fine(
                        i + "-th row " + columnCounter + "=th column is a string (" + strVarLength + " bytes)");
                //String raw_datum = new String(Arrays.copyOfRange(dataRowBytes, byte_offset,
                //        (byte_offset + strVarLength)), "ISO-8859-1");
                // (old) TODO: 
                // is it the right thing to do, to default to "ISO-8859-1"?
                // (it may be; since there's no mechanism for specifying
                // alternative encodings in Stata, this may be their default;
                // it just needs to be verified. -- L.A. Jul. 2014)
                // ACTUALLY, in STATA13, it appears that STRF *MUST*
                // be limited to ASCII. Binary strings can be stored as 
                // STRLs. (Oct. 6 2014)

                //String string_datum = getNullStrippedString(raw_datum);
                String string_datum = reader.readString(strVarLength);
                if (string_datum.length() < 64) {
                    logger.fine(i + "-th row " + columnCounter + "=th column string =" + string_datum);
                } else {
                    logger.fine(i + "-th row " + columnCounter + "=th column string ="
                            + string_datum.substring(0, 64) + "... (truncated)");
                }
                if (string_datum.equals("")) {

                    logger.fine(
                            i + "-th row " + columnCounter + "=th column string missing value=" + string_datum);

                    // TODO: 
                    /* Is this really a missing value case? 
                     * Or is it an honest empty string? 
                     * Is there such a thing as a missing value for a String in Stata?
                     * -- L.A. 4.0
                     */
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    /*
                     * Some special characters, like new lines and tabs need to 
                     * be escaped - otherwise they will break our TAB file 
                     * structure! 
                     */

                    dataRow[columnCounter] = escapeCharacterString(string_datum);
                }
                byte_offset += strVarLength;
            } else if (varType.equals("STRL")) {
                //throw new IOException("<Support for STRLs not yet implemented>");
                logger.fine("STRL encountered.");

                if (cachedGSOs == null) {
                    cachedGSOs = new LinkedHashMap<>();
                }

                // Reading the (v,o) pair: 
                long v = 0;
                long o = 0;
                String voPair = null;
                // first v:

                v = reader.readInteger();
                byte_offset += 4;

                // then o:

                o = reader.readInteger();
                byte_offset += 4;

                // create v,o pair; save, for now:

                voPair = v + "," + o;
                dataRow[columnCounter] = voPair;

                // TODO: 
                // Validate v and o? 
                // Making sure v <= varNum and o < numbObs; 
                // or, if o == numObs, v <= columnCounter; 
                // -- per the Stata 13 spec...

                if (!(v == columnCounter + 1 && o == i + 1)) {
                    if (!cachedGSOs.containsKey(voPair)) {
                        cachedGSOs.put(voPair, "");
                        // this means we need to cache this GSO, when 
                        // we read the STRLS section later on. 
                    }
                }

            } else {
                logger.warning("unknown variable type found: " + varType);
                String errorMessage = "unknown variable type encounted when reading data section: " + varType;
                //throw new InvalidObjectException(errorMessage);
                throw new IOException(errorMessage);

            }
        } // for (columnCounter)

        if (byte_offset != bytes_per_row) {
            throw new IOException("Unexpected number of bytes read for data row " + i + "; " + bytes_per_row
                    + " expected, " + byte_offset + " read.");
        }

        // Dump the row of data to the tab-delimited file:
        pwout.println(StringUtils.join(dataRow, "\t"));

        logger.fine("finished reading " + i + "-th row");

    } // for (rows)

    pwout.close();

    reader.readClosingTag(TAG_DATA);
    logger.fine("DTA117 Ingest: readData(): end.");

}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

private void decodeDescriptorVarTypeList(BufferedInputStream stream, int nvar) throws IOException {
    byte[] typeList = new byte[nvar];

    // note: the offset param of read() is relative to
    // the current position, not absolute position
    int nbytes = stream.read(typeList, 0, nvar);
    //printHexDump(typeList, "variable type list");
    if (nbytes == 0) {
        throw new IOException("reading the descriptior: no byte was read");
    }/* w  w  w.  ja  v  a 2  s.co m*/
    /*
     Stata internal constants representing variable type information; 
     these were kindly provided by Akio:
    111 type
    Type:   b   i   l   f   d (byte, int, long, float, double)
    byte:  -5  -4  -3  -2  -1 (signed byte = java's byte type)
    byte: 251 252 253 254 255 (unsigned byte)
    HEX:  FB  FC  FD  FE  FF
            
    105 type(type chars correspond to their hex/decimal expressions
    Type:   b   i   l   f   d (byte, int, long, float, double)
    byte:  98 105 108 102 100 (signed byte = java's byte type)
    byte:  98 105 108 102 100 (unsigned byte)
    HEX:  62  69  6C  66  64
     */
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("type_offset_table:\n" + typeOffsetTable);

    bytes_per_row = 0;

    for (int i = 0; i < typeList.length; i++) {
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "-th value=" + typeList[i]);

        /*
         * How Stata types correspond to the DVN types: 
         * "Byte", "Integer" and "Long" become Numeric, Discrete (unless date value); 
         * "Float" and "Double" become Numeric, Continuous (unless date value);
         * "String" becomes String;
         * Date/time values stored as numeric types above, are converted into 
         * Strings.
         * -- L.A. 4.0
         */

        if (byteLengthTable.containsKey(typeList[i])) {
            bytes_per_row += byteLengthTable.get(typeList[i]);
            variableTypes[i] = variableTypeTable.get(typeList[i]);
            String typeLabel = variableTypes[i];

            if (typeLabel != null) {
                dataTable.getDataVariables().get(i).setTypeNumeric();
                if (typeLabel.equals("Byte") || typeLabel.equals("Integer") || typeLabel.equals("Long")) {
                    // these are treated as discrete:
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();

                } else if (typeLabel.equals("Float") || typeLabel.equals("Double")) {
                    // these are treated as contiuous:
                    dataTable.getDataVariables().get(i).setIntervalContinuous();

                } else {
                    throw new IOException("Unrecognized type label: " + typeLabel
                            + " for Stata type value byte " + typeList[i] + ".");
                }
            } else {
                throw new IOException(
                        "No entry in the known types table for Stata type value byte " + typeList[i] + ".");
            }
        } else {
            // pre-111 string type
            if (releaseNumber < 111) {
                int stringType = 256 + typeList[i];
                if (stringType >= typeOffsetTable.get("STRING")) {
                    int string_var_length = stringType - typeOffsetTable.get("STRING");
                    if (dbgLog.isLoggable(Level.FINE))
                        dbgLog.fine("string_var_length=" + string_var_length);
                    bytes_per_row += string_var_length;

                    variableTypes[i] = "String";
                    dataTable.getDataVariables().get(i).setTypeCharacter();
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();
                    StringLengthTable.put(i, string_var_length);

                } else {
                    throw new IOException("unknown variable type was detected: reading errors?");
                }
            } else if (releaseNumber >= 111) {
                // post-111 string type
                if (dbgLog.isLoggable(Level.FINE))
                    dbgLog.fine("DTA reader: typeList[" + i + "]=" + typeList[i]);

                // if the size of strXXX type is less than 128,
                // the value of typeList[i] will be equal to that;
                // if however it is >= 128, typeList[i] = (size - 256)
                // i.e. it'll be a negative value:

                int stringType = ((typeList[i] > 0) && (typeList[i] <= 127)) ? typeList[i] : 256 + typeList[i];

                if (stringType >= typeOffsetTable.get("STRING")) {
                    int string_var_length = stringType - typeOffsetTable.get("STRING");
                    if (dbgLog.isLoggable(Level.FINE))
                        dbgLog.fine("DTA reader: string_var_length=" + string_var_length);
                    bytes_per_row += string_var_length;

                    variableTypes[i] = "String";
                    dataTable.getDataVariables().get(i).setTypeCharacter();
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();
                    StringLengthTable.put(i, string_var_length);

                } else {
                    throw new IOException("unknown variable type was detected: reading errors?");
                }
            } else {
                throw new IOException("uknown release number ");
            }

        }
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "=th\t sum=" + bytes_per_row);
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("bytes_per_row(final)=" + bytes_per_row);
        dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes));
        dbgLog.fine("StringLengthTable=" + StringLengthTable);
    }

}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java

/**
 * Get Variable Type List//from   w  w  w  . j  ava2s. c o  m
 * 
 * Categorize the columns of a data-set according to data-type. Returns a list
 * of integers corresponding to: (-1) String (0) Integer (1) Double-precision.
 * The numbers do not directly correspond with anything used by UNF5Util,
 * however this convention is seen throughout the DVN data-file readers.
 * 
 * This function essentially matches R data-types with those understood by
 * DVN:
 * * integer => "Integer"
 * * numeric (non-integer), double => "Double"
 * * Date => "Date"
 * * Other => "String"
 * 
 * @param dataTypes an array of strings where index corresponds to data-set
 * column and string corresponds to the class of the R-object.
 * @return 
 */
private List<Integer> getVariableTypeList(String[] dataTypes) {
    /* 
     * TODO: 
     * 
     * Clean up this code; for example, the VariableMetaData variable "columnData"
     * is created below, but never saved or used. A vector of VariableMetaData 
     * values actually gets created somewhere else in the code of the reader, and those 
     * are the values that could be used elsewhere. Need to pick the one we want 
     * to use and remove the other one - for clarity. 
     * 
     * The whole setup with the "minimalTypeList" and "normalTypeList" is 
     * kinda confusing. One is used for the UNF and stats, the other one for 
     * metadata processing; which is ok. But then it is actually the "normal" 
     * one that is used for the "minimal" inside the SDIOMetadata object... 
     * Just renaming these to something that's more intuitive - types_for_UNF vs. 
     * types_for_METADATA - should be enough. 
     * 
     * --L.A.
     */

    //
    Map<String, HashMap<String, String>> valueLabelTable = new HashMap<String, HashMap<String, String>>();

    //
    mFormatTable = new int[mVarQuantity];
    // Okay.
    List<Integer> minimalTypeList = new ArrayList<Integer>(), normalTypeList = new ArrayList<Integer>();

    Set<Integer> decimalVariableSet = new HashSet<Integer>();

    int k = 0;

    for (String type : dataTypes) {
        VariableMetaData columnMetaData;

        // Log

        String variableName = variableNameList.get(k);

        // Convention is that integer is zero, right?
        if (type.equals("integer")) {
            minimalTypeList.add(0);
            normalTypeList.add(0);
            mFormatTable[k] = FORMAT_INTEGER;
            mPrintFormatList.add(1);
            // mPrintFormatNameTable.put(variableName, "N");

            columnMetaData = new VariableMetaData(1);
        }

        // Double-precision data-types
        else if (type.equals("numeric") || type.equals("double")) {
            LOG.fine("RDATAfilereader: getVariableTypeList: double variable;");
            minimalTypeList.add(1);
            normalTypeList.add(0);
            decimalVariableSet.add(k);
            mFormatTable[k] = FORMAT_NUMERIC;
            mPrintFormatList.add(1);

            columnMetaData = new VariableMetaData(1);
        }

        // If date
        else if (type.equals("Date")) {
            minimalTypeList.add(-1);
            normalTypeList.add(1);

            mFormatTable[k] = FORMAT_DATE;

            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "DATE10");
            mFormatCategoryTable.put(variableName, "date");

            columnMetaData = new VariableMetaData(0);

            LOG.fine("date variable detected. format: " + FORMAT_DATE);
        }

        else if (type.equals("POSIXct") || type.equals("POSIXlt") || type.equals("POSIXt")) {
            minimalTypeList.add(-1);
            normalTypeList.add(1);

            mFormatTable[k] = FORMAT_DATETIME;

            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "DATETIME23.3");
            mFormatCategoryTable.put(variableName, "time");

            columnMetaData = new VariableMetaData(0);

            LOG.fine("POSIXt variable detected. format: " + FORMAT_DATETIME);
        }

        else if (type.equals("factor")) {
            /* 
             * This is the counter-intuitive part: in R, factors always have 
             * internal integer values and character labels. However, we will 
             * always treat them as character/string variables, i.e. on the DVN
             * side they will be ingested as string-type categorical variables 
             * (with both the "value" and the "label" being the same string - the 
             * R factor label). Yes, this means we are dropping the numeric value
             * completely. Why not do what we do in SPSS, i.e. use the numeric for 
             * the value (and the TAB file entry)? - well, this is in fact a very 
             * different case: in SPSS, a researcher creating a categorical variable 
             * with numeric values would be hand-picking these numeric variables; 
             * so we assume that the chosen values are in fact meaningful. If they 
             * had some sort of a reason to assign 0 = "Male" and 7 = "Female", we 
             * assume that they wanted to do this. So we use the numeric codes for 
             * storage in the TAB file and for calculation of the UNF. In R however, 
             * the user has no control over the internal numeric codes; they are 
             * always created automatically and are in fact considered meaningless. 
             * So we are going to assume that it is the actual values of the labels 
             * that are meaningful. 
             *  -- L.A. 
             * 
             */
            minimalTypeList.add(-1);
            normalTypeList.add(1);
            mFormatTable[k] = FORMAT_STRING;
            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "other");
            mFormatCategoryTable.put(variableName, "other");

            columnMetaData = new VariableMetaData(0);
        } else if (type.equals("logical")) {
            minimalTypeList.add(0);
            normalTypeList.add(0);
            mFormatTable[k] = FORMAT_INTEGER;
            mPrintFormatList.add(1);
            // mPrintFormatNameTable.put(variableName, "N");

            columnMetaData = new VariableMetaData(1);
            columnMetaData.setBoolean(true);
            // Everything else is a string
        } else {
            minimalTypeList.add(-1);
            normalTypeList.add(1);
            mFormatTable[k] = FORMAT_STRING;
            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "other");
            mFormatCategoryTable.put(variableName, "other");

            columnMetaData = new VariableMetaData(0);
        }

        k++;
    }

    // Decimal Variables

    smd.setVariableTypeMinimal(
            ArrayUtils.toPrimitive(normalTypeList.toArray(new Integer[normalTypeList.size()])));
    smd.setDecimalVariables(decimalVariableSet);
    smd.setVariableStorageType(null);

    smd.setVariableFormat(mPrintFormatList);
    smd.setVariableFormatName(mPrintFormatNameTable);
    smd.setVariableFormatCategory(mFormatCategoryTable);
    // smd.set

    LOG.fine("minimalTypeList =    " + Arrays.deepToString(minimalTypeList.toArray()));
    LOG.fine("normalTypeList =     " + Arrays.deepToString(normalTypeList.toArray()));
    LOG.fine("decimalVariableSet = " + Arrays.deepToString(decimalVariableSet.toArray()));

    LOG.fine("mPrintFormatList =      " + mPrintFormatList);
    LOG.fine("mPrintFormatNameTable = " + mPrintFormatNameTable);
    LOG.fine("mFormatCategoryTable =  " + mFormatCategoryTable);

    LOG.fine("mFormatTable = " + mFormatTable);

    // Return the variable type list
    return minimalTypeList;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java

/**
  * Create UNF from Tabular File//ww w  .  j a v a 2 s  . c  o m
  * This methods iterates through each column of the supplied data table and
  * invoked the 
  * @param DataTable table a rectangular data table
  * @return void
  */
private void createUNF(DataTable table) throws IOException {
    List<Integer> variableTypeList = getVariableTypeList(mDataTypes);
    String[] dateFormats = new String[mCaseQuantity];
    String[] unfValues = new String[mVarQuantity];
    String fileUNFvalue = null;

    // Set variable types
    // smd.setVariableTypeMinimal(ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()])));

    int[] x = ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()]));

    for (int k = 0; k < mVarQuantity; k++) {
        String unfValue, name = variableNameList.get(k);
        int varType = variableTypeList.get(k);

        Object[] varData = table.getData()[k];

        LOG.fine(String.format("RDATAFileReader: Column \"%s\" = %s", name, Arrays.deepToString(varData)));

        try {
            switch (varType) {
            case 0:
                Long[] integerEntries = new Long[varData.length];

                LOG.fine(k + ": " + name + " is numeric (integer)");

                if (smd.isBooleanVariable()[k]) {
                    // This is not a regular integer - but a boolean!
                    LOG.fine(k + ": " + name + " is NOT a simple integer - it's a logical (boolean)!");
                    Boolean[] booleanEntries = new Boolean[varData.length];
                    for (int i = 0; i < varData.length; i++) {
                        if (varData[i] == null || varData[i].equals("")) {
                            // Missing Value: 
                            booleanEntries[i] = null;
                        } else if (((String) varData[i]).equals("0")) {
                            booleanEntries[i] = false;
                        } else if (((String) varData[i]).equals("1")) {
                            booleanEntries[i] = true;
                        } else {
                            // Treat it as a missing value? 
                            booleanEntries[i] = null;
                            // TODO: 
                            // Should we throw an exception here instead? 
                        }

                        // We'll also need the integer values, to calculate
                        // the summary statistics: 
                        try {
                            integerEntries[i] = new Long((String) varData[i]);
                        } catch (Exception ex) {
                            integerEntries[i] = null;
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(booleanEntries);
                    // TODO: 
                    // we've never calculated UNFs for Booleans before - 
                    // need to QA and verify that the values produced are correct.
                    // -- L.A.

                } else {
                    // Regular integer;
                    // Treat it as an array of Longs:
                    LOG.fine(k + ": " + name + " is a simple integer.");

                    for (int i = 0; i < varData.length; i++) {
                        try {
                            integerEntries[i] = new Long((String) varData[i]);
                        } catch (Exception ex) {
                            integerEntries[i] = null;
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(integerEntries);

                    // UNF5Util.cal
                }

                // Summary/category statistics
                smd.getSummaryStatisticsTable().put(k,
                        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(integerEntries)));
                Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(integerEntries);
                smd.getCategoryStatisticsTable().put(variableNameList.get(k), catStat);
                smd.getNullValueCounts().put(variableNameList.get(k),
                        StatHelper.countNullValues(integerEntries));

                break;

            // If double
            case 1:
                LOG.fine(k + ": " + name + " is numeric (double)");
                // Convert array of Strings to array of Doubles
                Double[] doubleEntries = new Double[varData.length];

                for (int i = 0; i < varData.length; i++) {
                    try {
                        // Check for the special case of "NaN" - this is the R and DVN
                        // notation for the "Not A Number" value:
                        if (varData[i] != null && ((String) varData[i]).equals("NaN")) {
                            doubleEntries[i] = Double.NaN;
                            // "Inf" is another special case, notation for infinity, 
                            // positive and negative:
                        } else if (varData[i] != null && (((String) varData[i]).equals("Inf")
                                || ((String) varData[i]).equals("+Inf"))) {

                            doubleEntries[i] = Double.POSITIVE_INFINITY;
                        } else if (varData[i] != null && ((String) varData[i]).equals("-Inf")) {

                            doubleEntries[i] = Double.NEGATIVE_INFINITY;
                        } else {
                            // Missing Values don't need to be treated separately; these 
                            // are represented as empty strings in the TAB file; so 
                            // attempting to create a Double object from one will 
                            // throw an exception - which we are going to intercept 
                            // below. For the UNF and Summary Stats purposes, missing
                            // values are represented as NULLs. 
                            doubleEntries[i] = new Double((String) varData[i]);
                        }
                    } catch (Exception ex) {
                        LOG.fine(k + ": " + name + " dropping value " + (String) varData[i] + " (" + i
                                + "); replacing with null");
                        doubleEntries[i] = null;
                    }
                }

                LOG.fine("sumstat:double case=" + Arrays.deepToString(ArrayUtils
                        .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries))));

                // Save summary statistics:
                smd.getSummaryStatisticsTable().put(k, ArrayUtils
                        .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries)));

                unfValue = UNF5Util.calculateUNF(doubleEntries);

                break;

            case -1:
                LOG.fine(k + ": " + name + " is string");

                String[] stringEntries = new String[varData.length];//Arrays.asList(varData).toArray(new String[varData.length]);

                LOG.fine("string array passed to calculateUNF: " + Arrays.deepToString(stringEntries));

                //
                if (mFormatTable[k] == FORMAT_DATE || mFormatTable[k] == FORMAT_DATETIME) {
                    DateFormatter dateFormatter = new DateFormatter();

                    dateFormatter.setDateFormats(DATE_FORMATS);
                    dateFormatter.setTimeFormats(TIME_FORMATS);

                    for (int i = 0; i < varData.length; i++) {
                        DateWithFormatter entryDateWithFormat;

                        // If data is missing, treat this entry as just that - 
                        // a missing value. Just like for all the other data types, 
                        // this is represented by a null:
                        if (dateFormats[i] != null && (varData[i].equals("") || varData[i].equals(" "))) {
                            stringEntries[i] = dateFormats[i] = null;
                        } else {
                            entryDateWithFormat = dateFormatter.getDateWithFormat((String) varData[i]);
                            if (entryDateWithFormat == null) {
                                LOG.fine("ATTENTION: the supplied date/time string could not be parsed ("
                                        + (String) varData[i]);
                                throw new IOException(
                                        "Could not parse supplied date/time string: " + (String) varData[i]);
                            }
                            // Otherwise get the pattern
                            // entryDateWithFormat = dateFormatter.getDateWithFormat(stringEntries[i]);
                            stringEntries[i] = (String) varData[i];
                            dateFormats[i] = entryDateWithFormat.getFormatter().toPattern();

                        }
                    }

                    // Compute UNF
                    try {
                        LOG.fine("RDATAFileReader: strdata = " + Arrays.deepToString(stringEntries));
                        LOG.fine("RDATAFileReader: dateFormats = " + Arrays.deepToString(dateFormats));

                        unfValue = UNF5Util.calculateUNF(stringEntries, dateFormats);
                    } catch (Exception ex) {
                        LOG.warning("RDATAFileReader: UNF for variable " + name + " could not be computed!");
                        //unfValue = UNF5Util.calculateUNF(stringEntries);
                        //ex.printStackTrace();
                        throw ex;
                    }
                } else {
                    for (int i = 0; i < varData.length; i++) {
                        if (varData[i] == null) {
                            // Missing Value
                            stringEntries[i] = null;
                        } else {
                            stringEntries[i] = (String) varData[i];
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(stringEntries);
                }

                smd.getSummaryStatisticsTable().put(k, StatHelper.calculateSummaryStatistics(stringEntries));
                Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(stringEntries);
                smd.getCategoryStatisticsTable().put(variableNameList.get(k), StrCatStat);
                smd.getNullValueCounts().put(variableNameList.get(k),
                        StatHelper.countNullValues(stringEntries));

                break;

            default:
                unfValue = null;

            }

            //LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValue));

            // Store UNF value
            unfValues[k] = unfValue;
        } catch (Exception ex) {
            LOG.fine("Exception caught while calculating UNF! " + ex.getMessage());
            ex.printStackTrace();
            throw new IOException("Exception caught while calculating UNF! " + ex.getMessage());
        }
        LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValues[k]));

    }

    try {
        fileUNFvalue = UNF5Util.calculateUNF(unfValues);
    } catch (Exception ex) {
        ex.printStackTrace();
        LOG.fine("Exception caught while calculating the combined UNF for the data set! " + ex.getMessage());
        throw new IOException(
                "Exception caught while calculating the combined UNF for the data set! " + ex.getMessage());
    }
    mCsvDataTable.setUnf(unfValues);
    mCsvDataTable.setFileUnf(fileUNFvalue);

    // Set meta-data to make it look like a SAV file
    // smd.setVariableStorageType(null);
    // smd.setDecimalVariables(mDecimalVariableSet);

    boolean[] b = smd.isContinuousVariable();

    for (int k = 0; k < b.length; k++) {
        String s = b[k] ? "True" : "False";
        LOG.fine(k + " = " + s);
    }

    smd.setVariableUNF(unfValues);
    smd.getFileInformation().put("fileUNF", fileUNFvalue);
}

From source file:org.broadinstitute.gatk.utils.commandline.ParsingEngine.java

private static String formatArguments(Collection<ArgumentMatch> arguments) {
    StringBuilder sb = new StringBuilder();
    for (ArgumentMatch argument : arguments)
        sb.append(String.format("%nArgument '%s' has too many values: %s.", argument.label,
                Arrays.deepToString(argument.values().toArray())));
    return sb.toString();
}

From source file:jeplus.JEPlusProject.java

public String[][] getLHSJobList(int LHSsize, Random randomsrc) {

    if (randomsrc == null)
        randomsrc = RandomSource.getRandomGenerator();

    String[][] JobList = new String[LHSsize][];

    // Get all parameters (inc. idf and weather) and their distributions
    if (ParamTree != null) {
        // Create sample for each parameter
        String[][] SampledValues = getSampleInEqualProbSegments(LHSsize, randomsrc);
        // debug//from   w  w w  .j av a  2  s. com
        logger.debug(Arrays.deepToString(SampledValues));
        //
        int length = SampledValues.length;
        // Shuffle the sample value vector of each parameter
        for (int i = 1; i < length; i++) {
            Collections.shuffle(Arrays.asList(SampledValues[i]), randomsrc);
        }
        // n jobs are created by taking a value from each parameter's vector 
        // sequentially
        for (int i = 0; i < LHSsize; i++) {
            JobList[i] = new String[length];
            JobList[i][0] = new Formatter().format("LHS-%06d", i).toString(); // Job id
            for (int j = 1; j < length; j++) {
                JobList[i][j] = SampledValues[j][i];
            }
        }
        return JobList;
    }
    return null;
}

From source file:org.apache.sqoop.connector.idf.TestCSVIntermediateDataFormat.java

@Test
public void testArrayOfObjectsWithCSVTextInObjectArrayOut() {
    Schema schema = new Schema("test");
    schema.addColumn(new org.apache.sqoop.schema.type.Array("1",
            new org.apache.sqoop.schema.type.Array("array", new FixedPoint("ft", 2L, false))));
    schema.addColumn(new org.apache.sqoop.schema.type.Text("2"));
    dataFormat = new CSVIntermediateDataFormat(schema);
    Object[] givenArrayOne = { 11, 12 };
    Object[] givenArrayTwo = { 14, 15 };

    Object[] arrayOfArrays = new Object[2];
    arrayOfArrays[0] = givenArrayOne;//  ww w.  ja  va2  s  .c  o m
    arrayOfArrays[1] = givenArrayTwo;

    // create an array inside the object array
    Object[] data = new Object[2];
    data[0] = arrayOfArrays;
    data[1] = "text";
    dataFormat.setCSVTextData("'[\"[11, 12]\",\"[14, 15]\"]','text'");
    Object[] expectedArray = (Object[]) dataFormat.getObjectData()[0];
    assertEquals(2, expectedArray.length);
    assertEquals(Arrays.deepToString(arrayOfArrays), Arrays.toString(expectedArray));
    assertEquals("text", dataFormat.getObjectData()[1]);
}

From source file:com.yunguchang.data.ApplicationRepository.java

private Subquery<TBusScheduleCarEntity> applyOverlapScheduleCarSubquery(
        Subquery<TBusScheduleCarEntity> overlapScheduleCarSubQuery, String[] applicationIds,
        Root<TAzCarinfoEntity> carRoot, Root<TRsDriverinfoEntity> driverRoot, CriteriaBuilder cb,
        PrincipalExt principalExt) {/*from ww w. j  av  a 2s  .  co m*/
    Root<TBusScheduleCarEntity> subScheduleCarRoot = overlapScheduleCarSubQuery
            .from(TBusScheduleCarEntity.class);
    overlapScheduleCarSubQuery.select(subScheduleCarRoot);

    Path<DateTime> scheduleStartTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule)
            .get(TBusScheduleRelaEntity_.starttime);
    Path<DateTime> scheduleEndTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule)
            .get(TBusScheduleRelaEntity_.endtime);

    DateTime applicationStartTime = null;
    DateTime applicationEndTime = null;
    for (String applicationId : applicationIds) {
        TBusApplyinfoEntity applicationEntity = getApplicationById(applicationId, principalExt);
        if (applicationEntity == null) {
            throw logger.entityNotFound(TBusApplyinfoEntity.class, applicationId);
        }
        if (applicationStartTime == null || applicationEndTime.isAfter(applicationEntity.getBegintime())) {
            applicationStartTime = applicationEntity.getBegintime();
        }
        if (applicationEndTime == null || applicationEndTime.isBefore(applicationEntity.getEndtime())) {
            applicationEndTime = applicationEntity.getEndtime();
        }

    }

    if (applicationStartTime == null || applicationEndTime == null) {
        throw logger.invalidApplication(Arrays.deepToString(applicationIds));
    }

    Predicate predicate = cb.and(
            cb.or(cb.and(cb.between(scheduleStartTime, applicationStartTime, applicationEndTime)),
                    cb.and(cb.between(scheduleEndTime, applicationStartTime, applicationEndTime)),
                    cb.and(cb.lessThan(scheduleStartTime, applicationStartTime),
                            cb.greaterThan(scheduleEndTime, applicationEndTime))),
            subScheduleCarRoot.get(TBusScheduleCarEntity_.status).in(ScheduleStatus.AWAITING.id()));
    if (driverRoot != null) {
        predicate = cb.and(predicate,
                cb.and(cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot),
                        cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.driver), driverRoot)));
    } else {
        predicate = cb.and(predicate, cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot));
    }

    overlapScheduleCarSubQuery.where(predicate);
    return overlapScheduleCarSubQuery;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.spss.SPSSFileReader.java

private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber,
        int variablePosition)
        throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException {
    String unfValue = null;/*w ww.j  a v a  2 s  .c  om*/

    dbgLog.fine("variableType=" + variableType);
    dbgLog.finer("unfVersionNumber=" + unfVersionNumber);
    dbgLog.fine("variablePosition=" + variablePosition);
    dbgLog.fine("variableName=" + variableNameList.get(variablePosition));

    switch (variableType) {
    case 0:
        // Integer (Long):

        dbgLog.fine("Integer case");

        // Convert array of Strings to array of Longs
        Long[] ldata = new Long[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i] != null) {
            try {
                ldata[i] = new Long((String) varData[i]);
            } catch (Exception ex) {
                ldata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ldata);
        dbgLog.finer("integer:unfValue=" + unfValue);

        //dbgLog.finer("sumstat:long case=" + Arrays.deepToString(
        //        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)));

        Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;

    case 1:
        // Double:

        dbgLog.finer("double case");

        // Convert array of Strings to array of Doubles
        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i]!=null) {
            try {
                ddata[i] = new Double((String) varData[i]);
            } catch (Exception ex) {
                ddata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ddata);
        dbgLog.finer("double:unfValue=" + unfValue);
        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

        break;
    case -1:
        // String:
        //
        // i.e., this is something *stored* as string; it may still be
        // a more complex data type than just a string of characters.
        // Namely, it can be some date or time type that we support.
        // These should be handled differently when calculating the
        // UNFs.

        dbgLog.finer("string case");

        String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);
        dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata));

        if (dateFormats != null) {

            for (int i = 0; i < varData.length; i++) {
                if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) {
                    strdata[i] = null;
                    dateFormats[i] = null;
                }
            }

            unfValue = UNF5Util.calculateUNF(strdata, dateFormats);
        } else {
            unfValue = UNF5Util.calculateUNF(strdata);
        }

        dbgLog.finer("string:unfValue=" + unfValue);

        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata);
        //out.println("catStat="+StrCatStat);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch

    dbgLog.fine("unfvalue(last)=" + unfValue);
    return unfValue;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.ddi.DDIFileReader.java

private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber,
        int variablePosition)
        throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException {
    String unfValue = null;/*from   ww  w. j a v a  2s  .c  om*/

    dbgLog.fine("variableType=" + variableType);
    dbgLog.finer("unfVersionNumber=" + unfVersionNumber);
    dbgLog.fine("variablePosition=" + variablePosition);
    //dbgLog.fine("variableName="+variableNameList.get(variablePosition));

    switch (variableType) {
    case 0:
        // Integer (Long):

        dbgLog.fine("Integer case");

        // Convert array of Strings to array of Longs
        Long[] ldata = new Long[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i] != null) {
            try {
                ldata[i] = new Long((String) varData[i]);
            } catch (Exception ex) {
                ldata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ldata);
        dbgLog.finer("integer:unfValue=" + unfValue);

        //dbgLog.finer("sumstat:long case=" + Arrays.deepToString(
        //        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)));

        Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;

    case 1:
        // Double:

        dbgLog.finer("double case");

        // Convert array of Strings to array of Doubles
        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i]!=null) {
            try {
                ddata[i] = new Double((String) varData[i]);
            } catch (Exception ex) {
                ddata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ddata);
        dbgLog.finer("double:unfValue=" + unfValue);
        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

        break;
    case -1:
        // String:
        //
        // i.e., this is something *stored* as string; it may still be
        // a more complex data type than just a string of characters.
        // Namely, it can be some date or time type that we support.
        // These should be handled differently when calculating the
        // UNFs.

        dbgLog.finer("string case");

        String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);
        dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata));

        if (dateFormats != null) {

            for (int i = 0; i < varData.length; i++) {
                if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) {
                    strdata[i] = null;
                    dateFormats[i] = null;
                }
            }

            unfValue = UNF5Util.calculateUNF(strdata, dateFormats);
        } else {
            unfValue = UNF5Util.calculateUNF(strdata);
        }

        dbgLog.finer("string:unfValue=" + unfValue);

        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata);
        //out.println("catStat="+StrCatStat);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch

    dbgLog.fine("unfvalue(last)=" + unfValue);
    return unfValue;
}