Example usage for java.util Arrays deepToString

List of usage examples for java.util Arrays deepToString

Introduction

In this page you can find the example usage for java.util Arrays deepToString.

Prototype

public static String deepToString(Object[] a) 

Source Link

Document

Returns a string representation of the "deep contents" of the specified array.

Usage

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTA117FileReader.java

private void readData(DataReader reader) throws IOException {
    logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: "
            + dtaMap.getOffset_data());/* w w w  .  j  a  va 2  s . c o m*/
    logger.fine("readData(): start");
    reader.readOpeningTag(TAG_DATA);
    // TODO: 
    // check that we are at the right byte offset!

    int nvar = dataTable.getVarQuantity().intValue();
    int nobs = dataTable.getCaseQuantity().intValue();

    int[] variableByteLengths = getVariableByteLengths(variableTypes);
    int bytes_per_row = calculateBytesPerRow(variableByteLengths);

    logger.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")");
    logger.fine("bytes per row=" + bytes_per_row + " bytes");
    logger.fine("variableTypes=" + Arrays.deepToString(variableTypes));

    // create a File object to save the tab-delimited data file
    FileOutputStream fileOutTab = null;
    PrintWriter pwout = null;
    File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");

    // save the temp tab-delimited file in the return ingest object:        
    ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);

    fileOutTab = new FileOutputStream(tabDelimitedDataFile);
    pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);

    logger.fine("Beginning to read data stream.");

    for (int i = 0; i < nobs; i++) {
        //byte[] dataRowBytes = new byte[bytes_per_row];
        Object[] dataRow = new Object[nvar];

        //int nbytes = stream.read(dataRowBytes, 0, bytes_per_row);
        //dataRowBytes = reader.readBytes(bytes_per_row);
        // TODO: 
        // maybe intercept any potential exceptions here, and add more 
        // diagnostic info, before re-throwing...
        int byte_offset = 0;
        for (int columnCounter = 0; columnCounter < nvar; columnCounter++) {

            String varType = variableTypes[columnCounter];

            // 4.0 Check if this is a time/date variable: 
            boolean isDateTimeDatum = false;
            // TODO: 
            // make sure the formats are properly set! -- use the old 
            // plugin as a model... 
            String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory();
            if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) {
                isDateTimeDatum = true;
            }

            // TODO: 
            // ditto
            String variableFormat = dateVariableFormats[columnCounter];

            if (varType == null || varType.equals("")) {
                throw new IOException("Undefined variable type encountered in readData()");
            }

            // TODO: 
            // double-check that the missing values constants are still correct!
            if (varType.equals("Byte")) {
                // (signed) Byte
                byte byte_datum = reader.readSignedByte();

                logger.fine(i + "-th row " + columnCounter + "=th column byte =" + byte_datum);
                if (byte_datum >= BYTE_MISSING_VALUE) {
                    logger.fine(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    dataRow[columnCounter] = byte_datum;
                    logger.fine(i + "-th row " + columnCounter + "-th column byte value=" + byte_datum);
                }

                byte_offset++;
            } else if (varType.equals("Integer")) {
                short short_datum = (short) reader.readShortSignedInteger();

                logger.fine(i + "-th row " + columnCounter + "=th column stata int =" + short_datum);

                if (short_datum >= INT_MISSIG_VALUE) {
                    logger.fine(i + "-th row " + columnCounter + "=th column stata long missing value="
                            + short_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {

                        DecodedDateTime ddt = decodeDateTimeData("short", variableFormat,
                                Short.toString(short_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        //dateFormat[columnCounter][i] = ddt.format;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = short_datum;
                        logger.fine(
                                i + "-th row " + columnCounter + "-th column \"integer\" value=" + short_datum);
                    }
                }
                byte_offset += 2;
            } else if (varType.equals("Long")) {
                // stata-Long (= java's int: 4 byte), signed.

                int int_datum = reader.readSignedInteger();

                if (int_datum >= LONG_MISSING_VALUE) {
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("int", variableFormat,
                                Integer.toString(int_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = int_datum;
                        logger.fine(i + "-th row " + columnCounter + "-th column \"long\" value=" + int_datum);
                    }

                }
                byte_offset += 4;
            } else if (varType.equals("Float")) {
                // STATA float 
                // same as Java float - 4-byte

                float float_datum = reader.readFloat();

                logger.fine(i + "-th row " + columnCounter + "=th column float =" + float_datum);
                if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
                    logger.fine(
                            i + "-th row " + columnCounter + "=th column float missing value=" + float_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;

                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("float", variableFormat,
                                doubleNumberFormatter.format(float_datum));
                        logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = float_datum;
                        logger.fine(i + "-th row " + columnCounter + "=th column float value:" + float_datum);
                        // This may be temporary - but for now (as in, while I'm testing 
                        // 4.0 ingest against 3.* ingest, I need to be able to tell if a 
                        // floating point value was a single, or double float in the 
                        // original STATA file: -- L.A. Jul. 2014
                        dataTable.getDataVariables().get(columnCounter).setFormat("float");
                        // ?
                    }

                }
                byte_offset += 4;
            } else if (varType.equals("Double")) {
                // STATA double
                // same as Java double - 8-byte

                double double_datum = reader.readDouble();

                if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
                    logger.finer(
                            i + "-th row " + columnCounter + "=th column double missing value=" + double_datum);
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("double", variableFormat,
                                doubleNumberFormatter.format(double_datum));
                        logger.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        logger.fine(i + "-th row " + columnCounter + "=th column double value:" + double_datum); //doubleNumberFormatter.format(double_datum));

                        dataRow[columnCounter] = double_datum; //doubleNumberFormatter.format(double_datum);
                    }

                }
                byte_offset += 8;
            } else if (varType.matches("^STR[1-9][0-9]*")) {
                // String case
                int strVarLength = variableByteLengths[columnCounter];
                logger.fine(
                        i + "-th row " + columnCounter + "=th column is a string (" + strVarLength + " bytes)");
                //String raw_datum = new String(Arrays.copyOfRange(dataRowBytes, byte_offset,
                //        (byte_offset + strVarLength)), "ISO-8859-1");
                // (old) TODO: 
                // is it the right thing to do, to default to "ISO-8859-1"?
                // (it may be; since there's no mechanism for specifying
                // alternative encodings in Stata, this may be their default;
                // it just needs to be verified. -- L.A. Jul. 2014)
                // ACTUALLY, in STATA13, it appears that STRF *MUST*
                // be limited to ASCII. Binary strings can be stored as 
                // STRLs. (Oct. 6 2014)

                //String string_datum = getNullStrippedString(raw_datum);
                String string_datum = reader.readString(strVarLength);
                if (string_datum.length() < 64) {
                    logger.fine(i + "-th row " + columnCounter + "=th column string =" + string_datum);
                } else {
                    logger.fine(i + "-th row " + columnCounter + "=th column string ="
                            + string_datum.substring(0, 64) + "... (truncated)");
                }
                if (string_datum.equals("")) {

                    logger.fine(
                            i + "-th row " + columnCounter + "=th column string missing value=" + string_datum);

                    // TODO: 
                    /* Is this really a missing value case? 
                     * Or is it an honest empty string? 
                     * Is there such a thing as a missing value for a String in Stata?
                     * -- L.A. 4.0
                     */
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    /*
                     * Some special characters, like new lines and tabs need to 
                     * be escaped - otherwise they will break our TAB file 
                     * structure! 
                     */

                    dataRow[columnCounter] = escapeCharacterString(string_datum);
                }
                byte_offset += strVarLength;
            } else if (varType.equals("STRL")) {
                //throw new IOException("<Support for STRLs not yet implemented>");
                logger.fine("STRL encountered.");

                if (cachedGSOs == null) {
                    cachedGSOs = new LinkedHashMap<>();
                }

                // Reading the (v,o) pair: 
                long v = 0;
                long o = 0;
                String voPair = null;
                // first v:

                v = reader.readInteger();
                byte_offset += 4;

                // then o:

                o = reader.readInteger();
                byte_offset += 4;

                // create v,o pair; save, for now:

                voPair = v + "," + o;
                dataRow[columnCounter] = voPair;

                // TODO: 
                // Validate v and o? 
                // Making sure v <= varNum and o < numbObs; 
                // or, if o == numObs, v <= columnCounter; 
                // -- per the Stata 13 spec...

                if (!(v == columnCounter + 1 && o == i + 1)) {
                    if (!cachedGSOs.containsKey(voPair)) {
                        cachedGSOs.put(voPair, "");
                        // this means we need to cache this GSO, when 
                        // we read the STRLS section later on. 
                    }
                }

            } else {
                logger.warning("unknown variable type found: " + varType);
                String errorMessage = "unknown variable type encounted when reading data section: " + varType;
                //throw new InvalidObjectException(errorMessage);
                throw new IOException(errorMessage);

            }
        } // for (columnCounter)

        if (byte_offset != bytes_per_row) {
            throw new IOException("Unexpected number of bytes read for data row " + i + "; " + bytes_per_row
                    + " expected, " + byte_offset + " read.");
        }

        // Dump the row of data to the tab-delimited file:
        pwout.println(StringUtils.join(dataRow, "\t"));

        logger.fine("finished reading " + i + "-th row");

    } // for (rows)

    pwout.close();

    reader.readClosingTag(TAG_DATA);
    logger.fine("DTA117 Ingest: readData(): end.");

}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

private void decodeDescriptorVarTypeList(BufferedInputStream stream, int nvar) throws IOException {
    byte[] typeList = new byte[nvar];

    // note: the offset param of read() is relative to
    // the current position, not absolute position
    int nbytes = stream.read(typeList, 0, nvar);
    //printHexDump(typeList, "variable type list");
    if (nbytes == 0) {
        throw new IOException("reading the descriptior: no byte was read");
    }/* w  w  w.  ja  v  a 2  s.co m*/
    /*
     Stata internal constants representing variable type information; 
     these were kindly provided by Akio:
    111 type
    Type:   b   i   l   f   d (byte, int, long, float, double)
    byte:  -5  -4  -3  -2  -1 (signed byte = java's byte type)
    byte: 251 252 253 254 255 (unsigned byte)
    HEX:  FB  FC  FD  FE  FF
            
    105 type(type chars correspond to their hex/decimal expressions
    Type:   b   i   l   f   d (byte, int, long, float, double)
    byte:  98 105 108 102 100 (signed byte = java's byte type)
    byte:  98 105 108 102 100 (unsigned byte)
    HEX:  62  69  6C  66  64
     */
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("type_offset_table:\n" + typeOffsetTable);

    bytes_per_row = 0;

    for (int i = 0; i < typeList.length; i++) {
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "-th value=" + typeList[i]);

        /*
         * How Stata types correspond to the DVN types: 
         * "Byte", "Integer" and "Long" become Numeric, Discrete (unless date value); 
         * "Float" and "Double" become Numeric, Continuous (unless date value);
         * "String" becomes String;
         * Date/time values stored as numeric types above, are converted into 
         * Strings.
         * -- L.A. 4.0
         */

        if (byteLengthTable.containsKey(typeList[i])) {
            bytes_per_row += byteLengthTable.get(typeList[i]);
            variableTypes[i] = variableTypeTable.get(typeList[i]);
            String typeLabel = variableTypes[i];

            if (typeLabel != null) {
                dataTable.getDataVariables().get(i).setTypeNumeric();
                if (typeLabel.equals("Byte") || typeLabel.equals("Integer") || typeLabel.equals("Long")) {
                    // these are treated as discrete:
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();

                } else if (typeLabel.equals("Float") || typeLabel.equals("Double")) {
                    // these are treated as contiuous:
                    dataTable.getDataVariables().get(i).setIntervalContinuous();

                } else {
                    throw new IOException("Unrecognized type label: " + typeLabel
                            + " for Stata type value byte " + typeList[i] + ".");
                }
            } else {
                throw new IOException(
                        "No entry in the known types table for Stata type value byte " + typeList[i] + ".");
            }
        } else {
            // pre-111 string type
            if (releaseNumber < 111) {
                int stringType = 256 + typeList[i];
                if (stringType >= typeOffsetTable.get("STRING")) {
                    int string_var_length = stringType - typeOffsetTable.get("STRING");
                    if (dbgLog.isLoggable(Level.FINE))
                        dbgLog.fine("string_var_length=" + string_var_length);
                    bytes_per_row += string_var_length;

                    variableTypes[i] = "String";
                    dataTable.getDataVariables().get(i).setTypeCharacter();
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();
                    StringLengthTable.put(i, string_var_length);

                } else {
                    throw new IOException("unknown variable type was detected: reading errors?");
                }
            } else if (releaseNumber >= 111) {
                // post-111 string type
                if (dbgLog.isLoggable(Level.FINE))
                    dbgLog.fine("DTA reader: typeList[" + i + "]=" + typeList[i]);

                // if the size of strXXX type is less than 128,
                // the value of typeList[i] will be equal to that;
                // if however it is >= 128, typeList[i] = (size - 256)
                // i.e. it'll be a negative value:

                int stringType = ((typeList[i] > 0) && (typeList[i] <= 127)) ? typeList[i] : 256 + typeList[i];

                if (stringType >= typeOffsetTable.get("STRING")) {
                    int string_var_length = stringType - typeOffsetTable.get("STRING");
                    if (dbgLog.isLoggable(Level.FINE))
                        dbgLog.fine("DTA reader: string_var_length=" + string_var_length);
                    bytes_per_row += string_var_length;

                    variableTypes[i] = "String";
                    dataTable.getDataVariables().get(i).setTypeCharacter();
                    dataTable.getDataVariables().get(i).setIntervalDiscrete();
                    StringLengthTable.put(i, string_var_length);

                } else {
                    throw new IOException("unknown variable type was detected: reading errors?");
                }
            } else {
                throw new IOException("uknown release number ");
            }

        }
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "=th\t sum=" + bytes_per_row);
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("bytes_per_row(final)=" + bytes_per_row);
        dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes));
        dbgLog.fine("StringLengthTable=" + StringLengthTable);
    }

}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java

/**
 * Get Variable Type List//from   w  w  w  . j  ava2s. c o  m
 * 
 * Categorize the columns of a data-set according to data-type. Returns a list
 * of integers corresponding to: (-1) String (0) Integer (1) Double-precision.
 * The numbers do not directly correspond with anything used by UNF5Util,
 * however this convention is seen throughout the DVN data-file readers.
 * 
 * This function essentially matches R data-types with those understood by
 * DVN:
 * * integer => "Integer"
 * * numeric (non-integer), double => "Double"
 * * Date => "Date"
 * * Other => "String"
 * 
 * @param dataTypes an array of strings where index corresponds to data-set
 * column and string corresponds to the class of the R-object.
 * @return 
 */
private List<Integer> getVariableTypeList(String[] dataTypes) {
    /* 
     * TODO: 
     * 
     * Clean up this code; for example, the VariableMetaData variable "columnData"
     * is created below, but never saved or used. A vector of VariableMetaData 
     * values actually gets created somewhere else in the code of the reader, and those 
     * are the values that could be used elsewhere. Need to pick the one we want 
     * to use and remove the other one - for clarity. 
     * 
     * The whole setup with the "minimalTypeList" and "normalTypeList" is 
     * kinda confusing. One is used for the UNF and stats, the other one for 
     * metadata processing; which is ok. But then it is actually the "normal" 
     * one that is used for the "minimal" inside the SDIOMetadata object... 
     * Just renaming these to something that's more intuitive - types_for_UNF vs. 
     * types_for_METADATA - should be enough. 
     * 
     * --L.A.
     */

    //
    Map<String, HashMap<String, String>> valueLabelTable = new HashMap<String, HashMap<String, String>>();

    //
    mFormatTable = new int[mVarQuantity];
    // Okay.
    List<Integer> minimalTypeList = new ArrayList<Integer>(), normalTypeList = new ArrayList<Integer>();

    Set<Integer> decimalVariableSet = new HashSet<Integer>();

    int k = 0;

    for (String type : dataTypes) {
        VariableMetaData columnMetaData;

        // Log

        String variableName = variableNameList.get(k);

        // Convention is that integer is zero, right?
        if (type.equals("integer")) {
            minimalTypeList.add(0);
            normalTypeList.add(0);
            mFormatTable[k] = FORMAT_INTEGER;
            mPrintFormatList.add(1);
            // mPrintFormatNameTable.put(variableName, "N");

            columnMetaData = new VariableMetaData(1);
        }

        // Double-precision data-types
        else if (type.equals("numeric") || type.equals("double")) {
            LOG.fine("RDATAfilereader: getVariableTypeList: double variable;");
            minimalTypeList.add(1);
            normalTypeList.add(0);
            decimalVariableSet.add(k);
            mFormatTable[k] = FORMAT_NUMERIC;
            mPrintFormatList.add(1);

            columnMetaData = new VariableMetaData(1);
        }

        // If date
        else if (type.equals("Date")) {
            minimalTypeList.add(-1);
            normalTypeList.add(1);

            mFormatTable[k] = FORMAT_DATE;

            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "DATE10");
            mFormatCategoryTable.put(variableName, "date");

            columnMetaData = new VariableMetaData(0);

            LOG.fine("date variable detected. format: " + FORMAT_DATE);
        }

        else if (type.equals("POSIXct") || type.equals("POSIXlt") || type.equals("POSIXt")) {
            minimalTypeList.add(-1);
            normalTypeList.add(1);

            mFormatTable[k] = FORMAT_DATETIME;

            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "DATETIME23.3");
            mFormatCategoryTable.put(variableName, "time");

            columnMetaData = new VariableMetaData(0);

            LOG.fine("POSIXt variable detected. format: " + FORMAT_DATETIME);
        }

        else if (type.equals("factor")) {
            /* 
             * This is the counter-intuitive part: in R, factors always have 
             * internal integer values and character labels. However, we will 
             * always treat them as character/string variables, i.e. on the DVN
             * side they will be ingested as string-type categorical variables 
             * (with both the "value" and the "label" being the same string - the 
             * R factor label). Yes, this means we are dropping the numeric value
             * completely. Why not do what we do in SPSS, i.e. use the numeric for 
             * the value (and the TAB file entry)? - well, this is in fact a very 
             * different case: in SPSS, a researcher creating a categorical variable 
             * with numeric values would be hand-picking these numeric variables; 
             * so we assume that the chosen values are in fact meaningful. If they 
             * had some sort of a reason to assign 0 = "Male" and 7 = "Female", we 
             * assume that they wanted to do this. So we use the numeric codes for 
             * storage in the TAB file and for calculation of the UNF. In R however, 
             * the user has no control over the internal numeric codes; they are 
             * always created automatically and are in fact considered meaningless. 
             * So we are going to assume that it is the actual values of the labels 
             * that are meaningful. 
             *  -- L.A. 
             * 
             */
            minimalTypeList.add(-1);
            normalTypeList.add(1);
            mFormatTable[k] = FORMAT_STRING;
            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "other");
            mFormatCategoryTable.put(variableName, "other");

            columnMetaData = new VariableMetaData(0);
        } else if (type.equals("logical")) {
            minimalTypeList.add(0);
            normalTypeList.add(0);
            mFormatTable[k] = FORMAT_INTEGER;
            mPrintFormatList.add(1);
            // mPrintFormatNameTable.put(variableName, "N");

            columnMetaData = new VariableMetaData(1);
            columnMetaData.setBoolean(true);
            // Everything else is a string
        } else {
            minimalTypeList.add(-1);
            normalTypeList.add(1);
            mFormatTable[k] = FORMAT_STRING;
            mPrintFormatList.add(0);
            mPrintFormatNameTable.put(variableName, "other");
            mFormatCategoryTable.put(variableName, "other");

            columnMetaData = new VariableMetaData(0);
        }

        k++;
    }

    // Decimal Variables

    smd.setVariableTypeMinimal(
            ArrayUtils.toPrimitive(normalTypeList.toArray(new Integer[normalTypeList.size()])));
    smd.setDecimalVariables(decimalVariableSet);
    smd.setVariableStorageType(null);

    smd.setVariableFormat(mPrintFormatList);
    smd.setVariableFormatName(mPrintFormatNameTable);
    smd.setVariableFormatCategory(mFormatCategoryTable);
    // smd.set

    LOG.fine("minimalTypeList =    " + Arrays.deepToString(minimalTypeList.toArray()));
    LOG.fine("normalTypeList =     " + Arrays.deepToString(normalTypeList.toArray()));
    LOG.fine("decimalVariableSet = " + Arrays.deepToString(decimalVariableSet.toArray()));

    LOG.fine("mPrintFormatList =      " + mPrintFormatList);
    LOG.fine("mPrintFormatNameTable = " + mPrintFormatNameTable);
    LOG.fine("mFormatCategoryTable =  " + mFormatCategoryTable);

    LOG.fine("mFormatTable = " + mFormatTable);

    // Return the variable type list
    return minimalTypeList;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java

/**
  * Create UNF from Tabular File//ww w  .  j a v a 2 s  . c  o m
  * This methods iterates through each column of the supplied data table and
  * invoked the 
  * @param DataTable table a rectangular data table
  * @return void
  */
private void createUNF(DataTable table) throws IOException {
    List<Integer> variableTypeList = getVariableTypeList(mDataTypes);
    String[] dateFormats = new String[mCaseQuantity];
    String[] unfValues = new String[mVarQuantity];
    String fileUNFvalue = null;

    // Set variable types
    // smd.setVariableTypeMinimal(ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()])));

    int[] x = ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()]));

    for (int k = 0; k < mVarQuantity; k++) {
        String unfValue, name = variableNameList.get(k);
        int varType = variableTypeList.get(k);

        Object[] varData = table.getData()[k];

        LOG.fine(String.format("RDATAFileReader: Column \"%s\" = %s", name, Arrays.deepToString(varData)));

        try {
            switch (varType) {
            case 0:
                Long[] integerEntries = new Long[varData.length];

                LOG.fine(k + ": " + name + " is numeric (integer)");

                if (smd.isBooleanVariable()[k]) {
                    // This is not a regular integer - but a boolean!
                    LOG.fine(k + ": " + name + " is NOT a simple integer - it's a logical (boolean)!");
                    Boolean[] booleanEntries = new Boolean[varData.length];
                    for (int i = 0; i < varData.length; i++) {
                        if (varData[i] == null || varData[i].equals("")) {
                            // Missing Value: 
                            booleanEntries[i] = null;
                        } else if (((String) varData[i]).equals("0")) {
                            booleanEntries[i] = false;
                        } else if (((String) varData[i]).equals("1")) {
                            booleanEntries[i] = true;
                        } else {
                            // Treat it as a missing value? 
                            booleanEntries[i] = null;
                            // TODO: 
                            // Should we throw an exception here instead? 
                        }

                        // We'll also need the integer values, to calculate
                        // the summary statistics: 
                        try {
                            integerEntries[i] = new Long((String) varData[i]);
                        } catch (Exception ex) {
                            integerEntries[i] = null;
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(booleanEntries);
                    // TODO: 
                    // we've never calculated UNFs for Booleans before - 
                    // need to QA and verify that the values produced are correct.
                    // -- L.A.

                } else {
                    // Regular integer;
                    // Treat it as an array of Longs:
                    LOG.fine(k + ": " + name + " is a simple integer.");

                    for (int i = 0; i < varData.length; i++) {
                        try {
                            integerEntries[i] = new Long((String) varData[i]);
                        } catch (Exception ex) {
                            integerEntries[i] = null;
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(integerEntries);

                    // UNF5Util.cal
                }

                // Summary/category statistics
                smd.getSummaryStatisticsTable().put(k,
                        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(integerEntries)));
                Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(integerEntries);
                smd.getCategoryStatisticsTable().put(variableNameList.get(k), catStat);
                smd.getNullValueCounts().put(variableNameList.get(k),
                        StatHelper.countNullValues(integerEntries));

                break;

            // If double
            case 1:
                LOG.fine(k + ": " + name + " is numeric (double)");
                // Convert array of Strings to array of Doubles
                Double[] doubleEntries = new Double[varData.length];

                for (int i = 0; i < varData.length; i++) {
                    try {
                        // Check for the special case of "NaN" - this is the R and DVN
                        // notation for the "Not A Number" value:
                        if (varData[i] != null && ((String) varData[i]).equals("NaN")) {
                            doubleEntries[i] = Double.NaN;
                            // "Inf" is another special case, notation for infinity, 
                            // positive and negative:
                        } else if (varData[i] != null && (((String) varData[i]).equals("Inf")
                                || ((String) varData[i]).equals("+Inf"))) {

                            doubleEntries[i] = Double.POSITIVE_INFINITY;
                        } else if (varData[i] != null && ((String) varData[i]).equals("-Inf")) {

                            doubleEntries[i] = Double.NEGATIVE_INFINITY;
                        } else {
                            // Missing Values don't need to be treated separately; these 
                            // are represented as empty strings in the TAB file; so 
                            // attempting to create a Double object from one will 
                            // throw an exception - which we are going to intercept 
                            // below. For the UNF and Summary Stats purposes, missing
                            // values are represented as NULLs. 
                            doubleEntries[i] = new Double((String) varData[i]);
                        }
                    } catch (Exception ex) {
                        LOG.fine(k + ": " + name + " dropping value " + (String) varData[i] + " (" + i
                                + "); replacing with null");
                        doubleEntries[i] = null;
                    }
                }

                LOG.fine("sumstat:double case=" + Arrays.deepToString(ArrayUtils
                        .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries))));

                // Save summary statistics:
                smd.getSummaryStatisticsTable().put(k, ArrayUtils
                        .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries)));

                unfValue = UNF5Util.calculateUNF(doubleEntries);

                break;

            case -1:
                LOG.fine(k + ": " + name + " is string");

                String[] stringEntries = new String[varData.length];//Arrays.asList(varData).toArray(new String[varData.length]);

                LOG.fine("string array passed to calculateUNF: " + Arrays.deepToString(stringEntries));

                //
                if (mFormatTable[k] == FORMAT_DATE || mFormatTable[k] == FORMAT_DATETIME) {
                    DateFormatter dateFormatter = new DateFormatter();

                    dateFormatter.setDateFormats(DATE_FORMATS);
                    dateFormatter.setTimeFormats(TIME_FORMATS);

                    for (int i = 0; i < varData.length; i++) {
                        DateWithFormatter entryDateWithFormat;

                        // If data is missing, treat this entry as just that - 
                        // a missing value. Just like for all the other data types, 
                        // this is represented by a null:
                        if (dateFormats[i] != null && (varData[i].equals("") || varData[i].equals(" "))) {
                            stringEntries[i] = dateFormats[i] = null;
                        } else {
                            entryDateWithFormat = dateFormatter.getDateWithFormat((String) varData[i]);
                            if (entryDateWithFormat == null) {
                                LOG.fine("ATTENTION: the supplied date/time string could not be parsed ("
                                        + (String) varData[i]);
                                throw new IOException(
                                        "Could not parse supplied date/time string: " + (String) varData[i]);
                            }
                            // Otherwise get the pattern
                            // entryDateWithFormat = dateFormatter.getDateWithFormat(stringEntries[i]);
                            stringEntries[i] = (String) varData[i];
                            dateFormats[i] = entryDateWithFormat.getFormatter().toPattern();

                        }
                    }

                    // Compute UNF
                    try {
                        LOG.fine("RDATAFileReader: strdata = " + Arrays.deepToString(stringEntries));
                        LOG.fine("RDATAFileReader: dateFormats = " + Arrays.deepToString(dateFormats));

                        unfValue = UNF5Util.calculateUNF(stringEntries, dateFormats);
                    } catch (Exception ex) {
                        LOG.warning("RDATAFileReader: UNF for variable " + name + " could not be computed!");
                        //unfValue = UNF5Util.calculateUNF(stringEntries);
                        //ex.printStackTrace();
                        throw ex;
                    }
                } else {
                    for (int i = 0; i < varData.length; i++) {
                        if (varData[i] == null) {
                            // Missing Value
                            stringEntries[i] = null;
                        } else {
                            stringEntries[i] = (String) varData[i];
                        }
                    }

                    unfValue = UNF5Util.calculateUNF(stringEntries);
                }

                smd.getSummaryStatisticsTable().put(k, StatHelper.calculateSummaryStatistics(stringEntries));
                Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(stringEntries);
                smd.getCategoryStatisticsTable().put(variableNameList.get(k), StrCatStat);
                smd.getNullValueCounts().put(variableNameList.get(k),
                        StatHelper.countNullValues(stringEntries));

                break;

            default:
                unfValue = null;

            }

            //LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValue));

            // Store UNF value
            unfValues[k] = unfValue;
        } catch (Exception ex) {
            LOG.fine("Exception caught while calculating UNF! " + ex.getMessage());
            ex.printStackTrace();
            throw new IOException("Exception caught while calculating UNF! " + ex.getMessage());
        }
        LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValues[k]));

    }

    try {
        fileUNFvalue = UNF5Util.calculateUNF(unfValues);
    } catch (Exception ex) {
        ex.printStackTrace();
        LOG.fine("Exception caught while calculating the combined UNF for the data set! " + ex.getMessage());
        throw new IOException(
                "Exception caught while calculating the combined UNF for the data set! " + ex.getMessage());
    }
    mCsvDataTable.setUnf(unfValues);
    mCsvDataTable.setFileUnf(fileUNFvalue);

    // Set meta-data to make it look like a SAV file
    // smd.setVariableStorageType(null);
    // smd.setDecimalVariables(mDecimalVariableSet);

    boolean[] b = smd.isContinuousVariable();

    for (int k = 0; k < b.length; k++) {
        String s = b[k] ? "True" : "False";
        LOG.fine(k + " = " + s);
    }

    smd.setVariableUNF(unfValues);
    smd.getFileInformation().put("fileUNF", fileUNFvalue);
}

From source file:org.broadinstitute.gatk.utils.commandline.ParsingEngine.java

private static String formatArguments(Collection<ArgumentMatch> arguments) {
    StringBuilder sb = new StringBuilder();
    for (ArgumentMatch argument : arguments)
        sb.append(String.format("%nArgument '%s' has too many values: %s.", argument.label,
                Arrays.deepToString(argument.values().toArray())));
    return sb.toString();
}

From source file:jeplus.JEPlusProject.java

public String[][] getLHSJobList(int LHSsize, Random randomsrc) {

    if (randomsrc == null)
        randomsrc = RandomSource.getRandomGenerator();

    String[][] JobList = new String[LHSsize][];

    // Get all parameters (inc. idf and weather) and their distributions
    if (ParamTree != null) {
        // Create sample for each parameter
        String[][] SampledValues = getSampleInEqualProbSegments(LHSsize, randomsrc);
        // debug//from   w  w w  .j av a  2  s. com
        logger.debug(Arrays.deepToString(SampledValues));
        //
        int length = SampledValues.length;
        // Shuffle the sample value vector of each parameter
        for (int i = 1; i < length; i++) {
            Collections.shuffle(Arrays.asList(SampledValues[i]), randomsrc);
        }
        // n jobs are created by taking a value from each parameter's vector 
        // sequentially
        for (int i = 0; i < LHSsize; i++) {
            JobList[i] = new String[length];
            JobList[i][0] = new Formatter().format("LHS-%06d", i).toString(); // Job id
            for (int j = 1; j < length; j++) {
                JobList[i][j] = SampledValues[j][i];
            }
        }
        return JobList;
    }
    return null;
}

From source file:org.apache.sqoop.connector.idf.TestCSVIntermediateDataFormat.java

@Test
public void testArrayOfObjectsWithCSVTextInObjectArrayOut() {
    Schema schema = new Schema("test");
    schema.addColumn(new org.apache.sqoop.schema.type.Array("1",
            new org.apache.sqoop.schema.type.Array("array", new FixedPoint("ft", 2L, false))));
    schema.addColumn(new org.apache.sqoop.schema.type.Text("2"));
    dataFormat = new CSVIntermediateDataFormat(schema);
    Object[] givenArrayOne = { 11, 12 };
    Object[] givenArrayTwo = { 14, 15 };

    Object[] arrayOfArrays = new Object[2];
    arrayOfArrays[0] = givenArrayOne;//  ww w.  ja  va2  s  .c  o m
    arrayOfArrays[1] = givenArrayTwo;

    // create an array inside the object array
    Object[] data = new Object[2];
    data[0] = arrayOfArrays;
    data[1] = "text";
    dataFormat.setCSVTextData("'[\"[11, 12]\",\"[14, 15]\"]','text'");
    Object[] expectedArray = (Object[]) dataFormat.getObjectData()[0];
    assertEquals(2, expectedArray.length);
    assertEquals(Arrays.deepToString(arrayOfArrays), Arrays.toString(expectedArray));
    assertEquals("text", dataFormat.getObjectData()[1]);
}

From source file:com.yunguchang.data.ApplicationRepository.java

private Subquery<TBusScheduleCarEntity> applyOverlapScheduleCarSubquery(
        Subquery<TBusScheduleCarEntity> overlapScheduleCarSubQuery, String[] applicationIds,
        Root<TAzCarinfoEntity> carRoot, Root<TRsDriverinfoEntity> driverRoot, CriteriaBuilder cb,
        PrincipalExt principalExt) {/*from ww w. j  av  a 2s  .  co m*/
    Root<TBusScheduleCarEntity> subScheduleCarRoot = overlapScheduleCarSubQuery
            .from(TBusScheduleCarEntity.class);
    overlapScheduleCarSubQuery.select(subScheduleCarRoot);

    Path<DateTime> scheduleStartTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule)
            .get(TBusScheduleRelaEntity_.starttime);
    Path<DateTime> scheduleEndTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule)
            .get(TBusScheduleRelaEntity_.endtime);

    DateTime applicationStartTime = null;
    DateTime applicationEndTime = null;
    for (String applicationId : applicationIds) {
        TBusApplyinfoEntity applicationEntity = getApplicationById(applicationId, principalExt);
        if (applicationEntity == null) {
            throw logger.entityNotFound(TBusApplyinfoEntity.class, applicationId);
        }
        if (applicationStartTime == null || applicationEndTime.isAfter(applicationEntity.getBegintime())) {
            applicationStartTime = applicationEntity.getBegintime();
        }
        if (applicationEndTime == null || applicationEndTime.isBefore(applicationEntity.getEndtime())) {
            applicationEndTime = applicationEntity.getEndtime();
        }

    }

    if (applicationStartTime == null || applicationEndTime == null) {
        throw logger.invalidApplication(Arrays.deepToString(applicationIds));
    }

    Predicate predicate = cb.and(
            cb.or(cb.and(cb.between(scheduleStartTime, applicationStartTime, applicationEndTime)),
                    cb.and(cb.between(scheduleEndTime, applicationStartTime, applicationEndTime)),
                    cb.and(cb.lessThan(scheduleStartTime, applicationStartTime),
                            cb.greaterThan(scheduleEndTime, applicationEndTime))),
            subScheduleCarRoot.get(TBusScheduleCarEntity_.status).in(ScheduleStatus.AWAITING.id()));
    if (driverRoot != null) {
        predicate = cb.and(predicate,
                cb.and(cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot),
                        cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.driver), driverRoot)));
    } else {
        predicate = cb.and(predicate, cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot));
    }

    overlapScheduleCarSubQuery.where(predicate);
    return overlapScheduleCarSubQuery;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.spss.SPSSFileReader.java

private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber,
        int variablePosition)
        throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException {
    String unfValue = null;/*w ww.j  a v a  2 s  .c  om*/

    dbgLog.fine("variableType=" + variableType);
    dbgLog.finer("unfVersionNumber=" + unfVersionNumber);
    dbgLog.fine("variablePosition=" + variablePosition);
    dbgLog.fine("variableName=" + variableNameList.get(variablePosition));

    switch (variableType) {
    case 0:
        // Integer (Long):

        dbgLog.fine("Integer case");

        // Convert array of Strings to array of Longs
        Long[] ldata = new Long[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i] != null) {
            try {
                ldata[i] = new Long((String) varData[i]);
            } catch (Exception ex) {
                ldata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ldata);
        dbgLog.finer("integer:unfValue=" + unfValue);

        //dbgLog.finer("sumstat:long case=" + Arrays.deepToString(
        //        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)));

        Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;

    case 1:
        // Double:

        dbgLog.finer("double case");

        // Convert array of Strings to array of Doubles
        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i]!=null) {
            try {
                ddata[i] = new Double((String) varData[i]);
            } catch (Exception ex) {
                ddata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ddata);
        dbgLog.finer("double:unfValue=" + unfValue);
        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

        break;
    case -1:
        // String:
        //
        // i.e., this is something *stored* as string; it may still be
        // a more complex data type than just a string of characters.
        // Namely, it can be some date or time type that we support.
        // These should be handled differently when calculating the
        // UNFs.

        dbgLog.finer("string case");

        String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);
        dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata));

        if (dateFormats != null) {

            for (int i = 0; i < varData.length; i++) {
                if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) {
                    strdata[i] = null;
                    dateFormats[i] = null;
                }
            }

            unfValue = UNF5Util.calculateUNF(strdata, dateFormats);
        } else {
            unfValue = UNF5Util.calculateUNF(strdata);
        }

        dbgLog.finer("string:unfValue=" + unfValue);

        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata);
        //out.println("catStat="+StrCatStat);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch

    dbgLog.fine("unfvalue(last)=" + unfValue);
    return unfValue;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.ddi.DDIFileReader.java

private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber,
        int variablePosition)
        throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException {
    String unfValue = null;/*from   ww  w. j a v a  2s  .c  om*/

    dbgLog.fine("variableType=" + variableType);
    dbgLog.finer("unfVersionNumber=" + unfVersionNumber);
    dbgLog.fine("variablePosition=" + variablePosition);
    //dbgLog.fine("variableName="+variableNameList.get(variablePosition));

    switch (variableType) {
    case 0:
        // Integer (Long):

        dbgLog.fine("Integer case");

        // Convert array of Strings to array of Longs
        Long[] ldata = new Long[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i] != null) {
            try {
                ldata[i] = new Long((String) varData[i]);
            } catch (Exception ex) {
                ldata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ldata);
        dbgLog.finer("integer:unfValue=" + unfValue);

        //dbgLog.finer("sumstat:long case=" + Arrays.deepToString(
        //        ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))));

        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)));

        Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata);
        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat);

        break;

    case 1:
        // Double:

        dbgLog.finer("double case");

        // Convert array of Strings to array of Doubles
        Double[] ddata = new Double[varData.length];
        for (int i = 0; i < varData.length; i++) {
            //if (varData[i]!=null) {
            try {
                ddata[i] = new Double((String) varData[i]);
            } catch (Exception ex) {
                ddata[i] = null;
            }
            //}
        }
        unfValue = UNF5Util.calculateUNF(ddata);
        dbgLog.finer("double:unfValue=" + unfValue);
        smd.getSummaryStatisticsTable().put(variablePosition,
                ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata)));

        break;
    case -1:
        // String:
        //
        // i.e., this is something *stored* as string; it may still be
        // a more complex data type than just a string of characters.
        // Namely, it can be some date or time type that we support.
        // These should be handled differently when calculating the
        // UNFs.

        dbgLog.finer("string case");

        String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]);
        dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata));

        if (dateFormats != null) {

            for (int i = 0; i < varData.length; i++) {
                if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) {
                    strdata[i] = null;
                    dateFormats[i] = null;
                }
            }

            unfValue = UNF5Util.calculateUNF(strdata, dateFormats);
        } else {
            unfValue = UNF5Util.calculateUNF(strdata);
        }

        dbgLog.finer("string:unfValue=" + unfValue);

        smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata));

        Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata);
        //out.println("catStat="+StrCatStat);

        smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat);

        break;
    default:
        dbgLog.fine("unknown variable type found");
        String errorMessage = "unknow variable Type found at varData section";
        throw new IllegalArgumentException(errorMessage);

    } // switch

    dbgLog.fine("unfvalue(last)=" + unfValue);
    return unfValue;
}