List of usage examples for java.util Arrays deepToString
public static String deepToString(Object[] a)
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTA117FileReader.java
private void readData(DataReader reader) throws IOException { logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data());/* w w w . j a va 2 s . c o m*/ logger.fine("readData(): start"); reader.readOpeningTag(TAG_DATA); // TODO: // check that we are at the right byte offset! int nvar = dataTable.getVarQuantity().intValue(); int nobs = dataTable.getCaseQuantity().intValue(); int[] variableByteLengths = getVariableByteLengths(variableTypes); int bytes_per_row = calculateBytesPerRow(variableByteLengths); logger.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")"); logger.fine("bytes per row=" + bytes_per_row + " bytes"); logger.fine("variableTypes=" + Arrays.deepToString(variableTypes)); // create a File object to save the tab-delimited data file FileOutputStream fileOutTab = null; PrintWriter pwout = null; File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); // save the temp tab-delimited file in the return ingest object: ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); logger.fine("Beginning to read data stream."); for (int i = 0; i < nobs; i++) { //byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; //int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); //dataRowBytes = reader.readBytes(bytes_per_row); // TODO: // maybe intercept any potential exceptions here, and add more // diagnostic info, before re-throwing... int byte_offset = 0; for (int columnCounter = 0; columnCounter < nvar; columnCounter++) { String varType = variableTypes[columnCounter]; // 4.0 Check if this is a time/date variable: boolean isDateTimeDatum = false; // TODO: // make sure the formats are properly set! -- use the old // plugin as a model... String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory(); if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) { isDateTimeDatum = true; } // TODO: // ditto String variableFormat = dateVariableFormats[columnCounter]; if (varType == null || varType.equals("")) { throw new IOException("Undefined variable type encountered in readData()"); } // TODO: // double-check that the missing values constants are still correct! if (varType.equals("Byte")) { // (signed) Byte byte byte_datum = reader.readSignedByte(); logger.fine(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { logger.fine(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { dataRow[columnCounter] = byte_datum; logger.fine(i + "-th row " + columnCounter + "-th column byte value=" + byte_datum); } byte_offset++; } else if (varType.equals("Integer")) { short short_datum = (short) reader.readShortSignedInteger(); logger.fine(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { logger.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; //dateFormat[columnCounter][i] = ddt.format; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = short_datum; logger.fine( i + "-th row " + columnCounter + "-th column \"integer\" value=" + short_datum); } } byte_offset += 2; } else if (varType.equals("Long")) { // stata-Long (= java's int: 4 byte), signed. int int_datum = reader.readSignedInteger(); if (int_datum >= LONG_MISSING_VALUE) { dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = int_datum; logger.fine(i + "-th row " + columnCounter + "-th column \"long\" value=" + int_datum); } } byte_offset += 4; } else if (varType.equals("Float")) { // STATA float // same as Java float - 4-byte float float_datum = reader.readFloat(); logger.fine(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { logger.fine( i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = float_datum; logger.fine(i + "-th row " + columnCounter + "=th column float value:" + float_datum); // This may be temporary - but for now (as in, while I'm testing // 4.0 ingest against 3.* ingest, I need to be able to tell if a // floating point value was a single, or double float in the // original STATA file: -- L.A. Jul. 2014 dataTable.getDataVariables().get(columnCounter).setFormat("float"); // ? } } byte_offset += 4; } else if (varType.equals("Double")) { // STATA double // same as Java double - 8-byte double double_datum = reader.readDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { logger.finer( i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); logger.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { logger.fine(i + "-th row " + columnCounter + "=th column double value:" + double_datum); //doubleNumberFormatter.format(double_datum)); dataRow[columnCounter] = double_datum; //doubleNumberFormatter.format(double_datum); } } byte_offset += 8; } else if (varType.matches("^STR[1-9][0-9]*")) { // String case int strVarLength = variableByteLengths[columnCounter]; logger.fine( i + "-th row " + columnCounter + "=th column is a string (" + strVarLength + " bytes)"); //String raw_datum = new String(Arrays.copyOfRange(dataRowBytes, byte_offset, // (byte_offset + strVarLength)), "ISO-8859-1"); // (old) TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying // alternative encodings in Stata, this may be their default; // it just needs to be verified. -- L.A. Jul. 2014) // ACTUALLY, in STATA13, it appears that STRF *MUST* // be limited to ASCII. Binary strings can be stored as // STRLs. (Oct. 6 2014) //String string_datum = getNullStrippedString(raw_datum); String string_datum = reader.readString(strVarLength); if (string_datum.length() < 64) { logger.fine(i + "-th row " + columnCounter + "=th column string =" + string_datum); } else { logger.fine(i + "-th row " + columnCounter + "=th column string =" + string_datum.substring(0, 64) + "... (truncated)"); } if (string_datum.equals("")) { logger.fine( i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); // TODO: /* Is this really a missing value case? * Or is it an honest empty string? * Is there such a thing as a missing value for a String in Stata? * -- L.A. 4.0 */ dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { /* * Some special characters, like new lines and tabs need to * be escaped - otherwise they will break our TAB file * structure! */ dataRow[columnCounter] = escapeCharacterString(string_datum); } byte_offset += strVarLength; } else if (varType.equals("STRL")) { //throw new IOException("<Support for STRLs not yet implemented>"); logger.fine("STRL encountered."); if (cachedGSOs == null) { cachedGSOs = new LinkedHashMap<>(); } // Reading the (v,o) pair: long v = 0; long o = 0; String voPair = null; // first v: v = reader.readInteger(); byte_offset += 4; // then o: o = reader.readInteger(); byte_offset += 4; // create v,o pair; save, for now: voPair = v + "," + o; dataRow[columnCounter] = voPair; // TODO: // Validate v and o? // Making sure v <= varNum and o < numbObs; // or, if o == numObs, v <= columnCounter; // -- per the Stata 13 spec... if (!(v == columnCounter + 1 && o == i + 1)) { if (!cachedGSOs.containsKey(voPair)) { cachedGSOs.put(voPair, ""); // this means we need to cache this GSO, when // we read the STRLS section later on. } } } else { logger.warning("unknown variable type found: " + varType); String errorMessage = "unknown variable type encounted when reading data section: " + varType; //throw new InvalidObjectException(errorMessage); throw new IOException(errorMessage); } } // for (columnCounter) if (byte_offset != bytes_per_row) { throw new IOException("Unexpected number of bytes read for data row " + i + "; " + bytes_per_row + " expected, " + byte_offset + " read."); } // Dump the row of data to the tab-delimited file: pwout.println(StringUtils.join(dataRow, "\t")); logger.fine("finished reading " + i + "-th row"); } // for (rows) pwout.close(); reader.readClosingTag(TAG_DATA); logger.fine("DTA117 Ingest: readData(): end."); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeDescriptorVarTypeList(BufferedInputStream stream, int nvar) throws IOException { byte[] typeList = new byte[nvar]; // note: the offset param of read() is relative to // the current position, not absolute position int nbytes = stream.read(typeList, 0, nvar); //printHexDump(typeList, "variable type list"); if (nbytes == 0) { throw new IOException("reading the descriptior: no byte was read"); }/* w w w. ja v a 2 s.co m*/ /* Stata internal constants representing variable type information; these were kindly provided by Akio: 111 type Type: b i l f d (byte, int, long, float, double) byte: -5 -4 -3 -2 -1 (signed byte = java's byte type) byte: 251 252 253 254 255 (unsigned byte) HEX: FB FC FD FE FF 105 type(type chars correspond to their hex/decimal expressions Type: b i l f d (byte, int, long, float, double) byte: 98 105 108 102 100 (signed byte = java's byte type) byte: 98 105 108 102 100 (unsigned byte) HEX: 62 69 6C 66 64 */ if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("type_offset_table:\n" + typeOffsetTable); bytes_per_row = 0; for (int i = 0; i < typeList.length; i++) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th value=" + typeList[i]); /* * How Stata types correspond to the DVN types: * "Byte", "Integer" and "Long" become Numeric, Discrete (unless date value); * "Float" and "Double" become Numeric, Continuous (unless date value); * "String" becomes String; * Date/time values stored as numeric types above, are converted into * Strings. * -- L.A. 4.0 */ if (byteLengthTable.containsKey(typeList[i])) { bytes_per_row += byteLengthTable.get(typeList[i]); variableTypes[i] = variableTypeTable.get(typeList[i]); String typeLabel = variableTypes[i]; if (typeLabel != null) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (typeLabel.equals("Byte") || typeLabel.equals("Integer") || typeLabel.equals("Long")) { // these are treated as discrete: dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else if (typeLabel.equals("Float") || typeLabel.equals("Double")) { // these are treated as contiuous: dataTable.getDataVariables().get(i).setIntervalContinuous(); } else { throw new IOException("Unrecognized type label: " + typeLabel + " for Stata type value byte " + typeList[i] + "."); } } else { throw new IOException( "No entry in the known types table for Stata type value byte " + typeList[i] + "."); } } else { // pre-111 string type if (releaseNumber < 111) { int stringType = 256 + typeList[i]; if (stringType >= typeOffsetTable.get("STRING")) { int string_var_length = stringType - typeOffsetTable.get("STRING"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("string_var_length=" + string_var_length); bytes_per_row += string_var_length; variableTypes[i] = "String"; dataTable.getDataVariables().get(i).setTypeCharacter(); dataTable.getDataVariables().get(i).setIntervalDiscrete(); StringLengthTable.put(i, string_var_length); } else { throw new IOException("unknown variable type was detected: reading errors?"); } } else if (releaseNumber >= 111) { // post-111 string type if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("DTA reader: typeList[" + i + "]=" + typeList[i]); // if the size of strXXX type is less than 128, // the value of typeList[i] will be equal to that; // if however it is >= 128, typeList[i] = (size - 256) // i.e. it'll be a negative value: int stringType = ((typeList[i] > 0) && (typeList[i] <= 127)) ? typeList[i] : 256 + typeList[i]; if (stringType >= typeOffsetTable.get("STRING")) { int string_var_length = stringType - typeOffsetTable.get("STRING"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("DTA reader: string_var_length=" + string_var_length); bytes_per_row += string_var_length; variableTypes[i] = "String"; dataTable.getDataVariables().get(i).setTypeCharacter(); dataTable.getDataVariables().get(i).setIntervalDiscrete(); StringLengthTable.put(i, string_var_length); } else { throw new IOException("unknown variable type was detected: reading errors?"); } } else { throw new IOException("uknown release number "); } } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "=th\t sum=" + bytes_per_row); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("bytes_per_row(final)=" + bytes_per_row); dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes)); dbgLog.fine("StringLengthTable=" + StringLengthTable); } }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java
/** * Get Variable Type List//from w w w . j ava2s. c o m * * Categorize the columns of a data-set according to data-type. Returns a list * of integers corresponding to: (-1) String (0) Integer (1) Double-precision. * The numbers do not directly correspond with anything used by UNF5Util, * however this convention is seen throughout the DVN data-file readers. * * This function essentially matches R data-types with those understood by * DVN: * * integer => "Integer" * * numeric (non-integer), double => "Double" * * Date => "Date" * * Other => "String" * * @param dataTypes an array of strings where index corresponds to data-set * column and string corresponds to the class of the R-object. * @return */ private List<Integer> getVariableTypeList(String[] dataTypes) { /* * TODO: * * Clean up this code; for example, the VariableMetaData variable "columnData" * is created below, but never saved or used. A vector of VariableMetaData * values actually gets created somewhere else in the code of the reader, and those * are the values that could be used elsewhere. Need to pick the one we want * to use and remove the other one - for clarity. * * The whole setup with the "minimalTypeList" and "normalTypeList" is * kinda confusing. One is used for the UNF and stats, the other one for * metadata processing; which is ok. But then it is actually the "normal" * one that is used for the "minimal" inside the SDIOMetadata object... * Just renaming these to something that's more intuitive - types_for_UNF vs. * types_for_METADATA - should be enough. * * --L.A. */ // Map<String, HashMap<String, String>> valueLabelTable = new HashMap<String, HashMap<String, String>>(); // mFormatTable = new int[mVarQuantity]; // Okay. List<Integer> minimalTypeList = new ArrayList<Integer>(), normalTypeList = new ArrayList<Integer>(); Set<Integer> decimalVariableSet = new HashSet<Integer>(); int k = 0; for (String type : dataTypes) { VariableMetaData columnMetaData; // Log String variableName = variableNameList.get(k); // Convention is that integer is zero, right? if (type.equals("integer")) { minimalTypeList.add(0); normalTypeList.add(0); mFormatTable[k] = FORMAT_INTEGER; mPrintFormatList.add(1); // mPrintFormatNameTable.put(variableName, "N"); columnMetaData = new VariableMetaData(1); } // Double-precision data-types else if (type.equals("numeric") || type.equals("double")) { LOG.fine("RDATAfilereader: getVariableTypeList: double variable;"); minimalTypeList.add(1); normalTypeList.add(0); decimalVariableSet.add(k); mFormatTable[k] = FORMAT_NUMERIC; mPrintFormatList.add(1); columnMetaData = new VariableMetaData(1); } // If date else if (type.equals("Date")) { minimalTypeList.add(-1); normalTypeList.add(1); mFormatTable[k] = FORMAT_DATE; mPrintFormatList.add(0); mPrintFormatNameTable.put(variableName, "DATE10"); mFormatCategoryTable.put(variableName, "date"); columnMetaData = new VariableMetaData(0); LOG.fine("date variable detected. format: " + FORMAT_DATE); } else if (type.equals("POSIXct") || type.equals("POSIXlt") || type.equals("POSIXt")) { minimalTypeList.add(-1); normalTypeList.add(1); mFormatTable[k] = FORMAT_DATETIME; mPrintFormatList.add(0); mPrintFormatNameTable.put(variableName, "DATETIME23.3"); mFormatCategoryTable.put(variableName, "time"); columnMetaData = new VariableMetaData(0); LOG.fine("POSIXt variable detected. format: " + FORMAT_DATETIME); } else if (type.equals("factor")) { /* * This is the counter-intuitive part: in R, factors always have * internal integer values and character labels. However, we will * always treat them as character/string variables, i.e. on the DVN * side they will be ingested as string-type categorical variables * (with both the "value" and the "label" being the same string - the * R factor label). Yes, this means we are dropping the numeric value * completely. Why not do what we do in SPSS, i.e. use the numeric for * the value (and the TAB file entry)? - well, this is in fact a very * different case: in SPSS, a researcher creating a categorical variable * with numeric values would be hand-picking these numeric variables; * so we assume that the chosen values are in fact meaningful. If they * had some sort of a reason to assign 0 = "Male" and 7 = "Female", we * assume that they wanted to do this. So we use the numeric codes for * storage in the TAB file and for calculation of the UNF. In R however, * the user has no control over the internal numeric codes; they are * always created automatically and are in fact considered meaningless. * So we are going to assume that it is the actual values of the labels * that are meaningful. * -- L.A. * */ minimalTypeList.add(-1); normalTypeList.add(1); mFormatTable[k] = FORMAT_STRING; mPrintFormatList.add(0); mPrintFormatNameTable.put(variableName, "other"); mFormatCategoryTable.put(variableName, "other"); columnMetaData = new VariableMetaData(0); } else if (type.equals("logical")) { minimalTypeList.add(0); normalTypeList.add(0); mFormatTable[k] = FORMAT_INTEGER; mPrintFormatList.add(1); // mPrintFormatNameTable.put(variableName, "N"); columnMetaData = new VariableMetaData(1); columnMetaData.setBoolean(true); // Everything else is a string } else { minimalTypeList.add(-1); normalTypeList.add(1); mFormatTable[k] = FORMAT_STRING; mPrintFormatList.add(0); mPrintFormatNameTable.put(variableName, "other"); mFormatCategoryTable.put(variableName, "other"); columnMetaData = new VariableMetaData(0); } k++; } // Decimal Variables smd.setVariableTypeMinimal( ArrayUtils.toPrimitive(normalTypeList.toArray(new Integer[normalTypeList.size()]))); smd.setDecimalVariables(decimalVariableSet); smd.setVariableStorageType(null); smd.setVariableFormat(mPrintFormatList); smd.setVariableFormatName(mPrintFormatNameTable); smd.setVariableFormatCategory(mFormatCategoryTable); // smd.set LOG.fine("minimalTypeList = " + Arrays.deepToString(minimalTypeList.toArray())); LOG.fine("normalTypeList = " + Arrays.deepToString(normalTypeList.toArray())); LOG.fine("decimalVariableSet = " + Arrays.deepToString(decimalVariableSet.toArray())); LOG.fine("mPrintFormatList = " + mPrintFormatList); LOG.fine("mPrintFormatNameTable = " + mPrintFormatNameTable); LOG.fine("mFormatCategoryTable = " + mFormatCategoryTable); LOG.fine("mFormatTable = " + mFormatTable); // Return the variable type list return minimalTypeList; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.rdata.RDATAFileReader.java
/** * Create UNF from Tabular File//ww w . j a v a 2 s . c o m * This methods iterates through each column of the supplied data table and * invoked the * @param DataTable table a rectangular data table * @return void */ private void createUNF(DataTable table) throws IOException { List<Integer> variableTypeList = getVariableTypeList(mDataTypes); String[] dateFormats = new String[mCaseQuantity]; String[] unfValues = new String[mVarQuantity]; String fileUNFvalue = null; // Set variable types // smd.setVariableTypeMinimal(ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()]))); int[] x = ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()])); for (int k = 0; k < mVarQuantity; k++) { String unfValue, name = variableNameList.get(k); int varType = variableTypeList.get(k); Object[] varData = table.getData()[k]; LOG.fine(String.format("RDATAFileReader: Column \"%s\" = %s", name, Arrays.deepToString(varData))); try { switch (varType) { case 0: Long[] integerEntries = new Long[varData.length]; LOG.fine(k + ": " + name + " is numeric (integer)"); if (smd.isBooleanVariable()[k]) { // This is not a regular integer - but a boolean! LOG.fine(k + ": " + name + " is NOT a simple integer - it's a logical (boolean)!"); Boolean[] booleanEntries = new Boolean[varData.length]; for (int i = 0; i < varData.length; i++) { if (varData[i] == null || varData[i].equals("")) { // Missing Value: booleanEntries[i] = null; } else if (((String) varData[i]).equals("0")) { booleanEntries[i] = false; } else if (((String) varData[i]).equals("1")) { booleanEntries[i] = true; } else { // Treat it as a missing value? booleanEntries[i] = null; // TODO: // Should we throw an exception here instead? } // We'll also need the integer values, to calculate // the summary statistics: try { integerEntries[i] = new Long((String) varData[i]); } catch (Exception ex) { integerEntries[i] = null; } } unfValue = UNF5Util.calculateUNF(booleanEntries); // TODO: // we've never calculated UNFs for Booleans before - // need to QA and verify that the values produced are correct. // -- L.A. } else { // Regular integer; // Treat it as an array of Longs: LOG.fine(k + ": " + name + " is a simple integer."); for (int i = 0; i < varData.length; i++) { try { integerEntries[i] = new Long((String) varData[i]); } catch (Exception ex) { integerEntries[i] = null; } } unfValue = UNF5Util.calculateUNF(integerEntries); // UNF5Util.cal } // Summary/category statistics smd.getSummaryStatisticsTable().put(k, ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(integerEntries))); Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(integerEntries); smd.getCategoryStatisticsTable().put(variableNameList.get(k), catStat); smd.getNullValueCounts().put(variableNameList.get(k), StatHelper.countNullValues(integerEntries)); break; // If double case 1: LOG.fine(k + ": " + name + " is numeric (double)"); // Convert array of Strings to array of Doubles Double[] doubleEntries = new Double[varData.length]; for (int i = 0; i < varData.length; i++) { try { // Check for the special case of "NaN" - this is the R and DVN // notation for the "Not A Number" value: if (varData[i] != null && ((String) varData[i]).equals("NaN")) { doubleEntries[i] = Double.NaN; // "Inf" is another special case, notation for infinity, // positive and negative: } else if (varData[i] != null && (((String) varData[i]).equals("Inf") || ((String) varData[i]).equals("+Inf"))) { doubleEntries[i] = Double.POSITIVE_INFINITY; } else if (varData[i] != null && ((String) varData[i]).equals("-Inf")) { doubleEntries[i] = Double.NEGATIVE_INFINITY; } else { // Missing Values don't need to be treated separately; these // are represented as empty strings in the TAB file; so // attempting to create a Double object from one will // throw an exception - which we are going to intercept // below. For the UNF and Summary Stats purposes, missing // values are represented as NULLs. doubleEntries[i] = new Double((String) varData[i]); } } catch (Exception ex) { LOG.fine(k + ": " + name + " dropping value " + (String) varData[i] + " (" + i + "); replacing with null"); doubleEntries[i] = null; } } LOG.fine("sumstat:double case=" + Arrays.deepToString(ArrayUtils .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries)))); // Save summary statistics: smd.getSummaryStatisticsTable().put(k, ArrayUtils .toObject(StatHelper.calculateSummaryStatisticsContDistSample(doubleEntries))); unfValue = UNF5Util.calculateUNF(doubleEntries); break; case -1: LOG.fine(k + ": " + name + " is string"); String[] stringEntries = new String[varData.length];//Arrays.asList(varData).toArray(new String[varData.length]); LOG.fine("string array passed to calculateUNF: " + Arrays.deepToString(stringEntries)); // if (mFormatTable[k] == FORMAT_DATE || mFormatTable[k] == FORMAT_DATETIME) { DateFormatter dateFormatter = new DateFormatter(); dateFormatter.setDateFormats(DATE_FORMATS); dateFormatter.setTimeFormats(TIME_FORMATS); for (int i = 0; i < varData.length; i++) { DateWithFormatter entryDateWithFormat; // If data is missing, treat this entry as just that - // a missing value. Just like for all the other data types, // this is represented by a null: if (dateFormats[i] != null && (varData[i].equals("") || varData[i].equals(" "))) { stringEntries[i] = dateFormats[i] = null; } else { entryDateWithFormat = dateFormatter.getDateWithFormat((String) varData[i]); if (entryDateWithFormat == null) { LOG.fine("ATTENTION: the supplied date/time string could not be parsed (" + (String) varData[i]); throw new IOException( "Could not parse supplied date/time string: " + (String) varData[i]); } // Otherwise get the pattern // entryDateWithFormat = dateFormatter.getDateWithFormat(stringEntries[i]); stringEntries[i] = (String) varData[i]; dateFormats[i] = entryDateWithFormat.getFormatter().toPattern(); } } // Compute UNF try { LOG.fine("RDATAFileReader: strdata = " + Arrays.deepToString(stringEntries)); LOG.fine("RDATAFileReader: dateFormats = " + Arrays.deepToString(dateFormats)); unfValue = UNF5Util.calculateUNF(stringEntries, dateFormats); } catch (Exception ex) { LOG.warning("RDATAFileReader: UNF for variable " + name + " could not be computed!"); //unfValue = UNF5Util.calculateUNF(stringEntries); //ex.printStackTrace(); throw ex; } } else { for (int i = 0; i < varData.length; i++) { if (varData[i] == null) { // Missing Value stringEntries[i] = null; } else { stringEntries[i] = (String) varData[i]; } } unfValue = UNF5Util.calculateUNF(stringEntries); } smd.getSummaryStatisticsTable().put(k, StatHelper.calculateSummaryStatistics(stringEntries)); Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(stringEntries); smd.getCategoryStatisticsTable().put(variableNameList.get(k), StrCatStat); smd.getNullValueCounts().put(variableNameList.get(k), StatHelper.countNullValues(stringEntries)); break; default: unfValue = null; } //LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValue)); // Store UNF value unfValues[k] = unfValue; } catch (Exception ex) { LOG.fine("Exception caught while calculating UNF! " + ex.getMessage()); ex.printStackTrace(); throw new IOException("Exception caught while calculating UNF! " + ex.getMessage()); } LOG.fine(String.format("RDATAFileReader: Column \"%s\" (UNF) = %s", name, unfValues[k])); } try { fileUNFvalue = UNF5Util.calculateUNF(unfValues); } catch (Exception ex) { ex.printStackTrace(); LOG.fine("Exception caught while calculating the combined UNF for the data set! " + ex.getMessage()); throw new IOException( "Exception caught while calculating the combined UNF for the data set! " + ex.getMessage()); } mCsvDataTable.setUnf(unfValues); mCsvDataTable.setFileUnf(fileUNFvalue); // Set meta-data to make it look like a SAV file // smd.setVariableStorageType(null); // smd.setDecimalVariables(mDecimalVariableSet); boolean[] b = smd.isContinuousVariable(); for (int k = 0; k < b.length; k++) { String s = b[k] ? "True" : "False"; LOG.fine(k + " = " + s); } smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUNFvalue); }
From source file:org.broadinstitute.gatk.utils.commandline.ParsingEngine.java
private static String formatArguments(Collection<ArgumentMatch> arguments) { StringBuilder sb = new StringBuilder(); for (ArgumentMatch argument : arguments) sb.append(String.format("%nArgument '%s' has too many values: %s.", argument.label, Arrays.deepToString(argument.values().toArray()))); return sb.toString(); }
From source file:jeplus.JEPlusProject.java
public String[][] getLHSJobList(int LHSsize, Random randomsrc) { if (randomsrc == null) randomsrc = RandomSource.getRandomGenerator(); String[][] JobList = new String[LHSsize][]; // Get all parameters (inc. idf and weather) and their distributions if (ParamTree != null) { // Create sample for each parameter String[][] SampledValues = getSampleInEqualProbSegments(LHSsize, randomsrc); // debug//from w w w .j av a 2 s. com logger.debug(Arrays.deepToString(SampledValues)); // int length = SampledValues.length; // Shuffle the sample value vector of each parameter for (int i = 1; i < length; i++) { Collections.shuffle(Arrays.asList(SampledValues[i]), randomsrc); } // n jobs are created by taking a value from each parameter's vector // sequentially for (int i = 0; i < LHSsize; i++) { JobList[i] = new String[length]; JobList[i][0] = new Formatter().format("LHS-%06d", i).toString(); // Job id for (int j = 1; j < length; j++) { JobList[i][j] = SampledValues[j][i]; } } return JobList; } return null; }
From source file:org.apache.sqoop.connector.idf.TestCSVIntermediateDataFormat.java
@Test public void testArrayOfObjectsWithCSVTextInObjectArrayOut() { Schema schema = new Schema("test"); schema.addColumn(new org.apache.sqoop.schema.type.Array("1", new org.apache.sqoop.schema.type.Array("array", new FixedPoint("ft", 2L, false)))); schema.addColumn(new org.apache.sqoop.schema.type.Text("2")); dataFormat = new CSVIntermediateDataFormat(schema); Object[] givenArrayOne = { 11, 12 }; Object[] givenArrayTwo = { 14, 15 }; Object[] arrayOfArrays = new Object[2]; arrayOfArrays[0] = givenArrayOne;// ww w. ja va2 s .c o m arrayOfArrays[1] = givenArrayTwo; // create an array inside the object array Object[] data = new Object[2]; data[0] = arrayOfArrays; data[1] = "text"; dataFormat.setCSVTextData("'[\"[11, 12]\",\"[14, 15]\"]','text'"); Object[] expectedArray = (Object[]) dataFormat.getObjectData()[0]; assertEquals(2, expectedArray.length); assertEquals(Arrays.deepToString(arrayOfArrays), Arrays.toString(expectedArray)); assertEquals("text", dataFormat.getObjectData()[1]); }
From source file:com.yunguchang.data.ApplicationRepository.java
private Subquery<TBusScheduleCarEntity> applyOverlapScheduleCarSubquery( Subquery<TBusScheduleCarEntity> overlapScheduleCarSubQuery, String[] applicationIds, Root<TAzCarinfoEntity> carRoot, Root<TRsDriverinfoEntity> driverRoot, CriteriaBuilder cb, PrincipalExt principalExt) {/*from ww w. j av a 2s . co m*/ Root<TBusScheduleCarEntity> subScheduleCarRoot = overlapScheduleCarSubQuery .from(TBusScheduleCarEntity.class); overlapScheduleCarSubQuery.select(subScheduleCarRoot); Path<DateTime> scheduleStartTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule) .get(TBusScheduleRelaEntity_.starttime); Path<DateTime> scheduleEndTime = subScheduleCarRoot.get(TBusScheduleCarEntity_.schedule) .get(TBusScheduleRelaEntity_.endtime); DateTime applicationStartTime = null; DateTime applicationEndTime = null; for (String applicationId : applicationIds) { TBusApplyinfoEntity applicationEntity = getApplicationById(applicationId, principalExt); if (applicationEntity == null) { throw logger.entityNotFound(TBusApplyinfoEntity.class, applicationId); } if (applicationStartTime == null || applicationEndTime.isAfter(applicationEntity.getBegintime())) { applicationStartTime = applicationEntity.getBegintime(); } if (applicationEndTime == null || applicationEndTime.isBefore(applicationEntity.getEndtime())) { applicationEndTime = applicationEntity.getEndtime(); } } if (applicationStartTime == null || applicationEndTime == null) { throw logger.invalidApplication(Arrays.deepToString(applicationIds)); } Predicate predicate = cb.and( cb.or(cb.and(cb.between(scheduleStartTime, applicationStartTime, applicationEndTime)), cb.and(cb.between(scheduleEndTime, applicationStartTime, applicationEndTime)), cb.and(cb.lessThan(scheduleStartTime, applicationStartTime), cb.greaterThan(scheduleEndTime, applicationEndTime))), subScheduleCarRoot.get(TBusScheduleCarEntity_.status).in(ScheduleStatus.AWAITING.id())); if (driverRoot != null) { predicate = cb.and(predicate, cb.and(cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot), cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.driver), driverRoot))); } else { predicate = cb.and(predicate, cb.equal(subScheduleCarRoot.get(TBusScheduleCarEntity_.car), carRoot)); } overlapScheduleCarSubQuery.where(predicate); return overlapScheduleCarSubQuery; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.spss.SPSSFileReader.java
private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber, int variablePosition) throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException { String unfValue = null;/*w ww.j a v a 2 s .c om*/ dbgLog.fine("variableType=" + variableType); dbgLog.finer("unfVersionNumber=" + unfVersionNumber); dbgLog.fine("variablePosition=" + variablePosition); dbgLog.fine("variableName=" + variableNameList.get(variablePosition)); switch (variableType) { case 0: // Integer (Long): dbgLog.fine("Integer case"); // Convert array of Strings to array of Longs Long[] ldata = new Long[varData.length]; for (int i = 0; i < varData.length; i++) { //if (varData[i] != null) { try { ldata[i] = new Long((String) varData[i]); } catch (Exception ex) { ldata[i] = null; } //} } unfValue = UNF5Util.calculateUNF(ldata); dbgLog.finer("integer:unfValue=" + unfValue); //dbgLog.finer("sumstat:long case=" + Arrays.deepToString( // ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)))); smd.getSummaryStatisticsTable().put(variablePosition, ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))); Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata); smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat); break; case 1: // Double: dbgLog.finer("double case"); // Convert array of Strings to array of Doubles Double[] ddata = new Double[varData.length]; for (int i = 0; i < varData.length; i++) { //if (varData[i]!=null) { try { ddata[i] = new Double((String) varData[i]); } catch (Exception ex) { ddata[i] = null; } //} } unfValue = UNF5Util.calculateUNF(ddata); dbgLog.finer("double:unfValue=" + unfValue); smd.getSummaryStatisticsTable().put(variablePosition, ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata))); break; case -1: // String: // // i.e., this is something *stored* as string; it may still be // a more complex data type than just a string of characters. // Namely, it can be some date or time type that we support. // These should be handled differently when calculating the // UNFs. dbgLog.finer("string case"); String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]); dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata)); if (dateFormats != null) { for (int i = 0; i < varData.length; i++) { if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) { strdata[i] = null; dateFormats[i] = null; } } unfValue = UNF5Util.calculateUNF(strdata, dateFormats); } else { unfValue = UNF5Util.calculateUNF(strdata); } dbgLog.finer("string:unfValue=" + unfValue); smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata)); Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata); //out.println("catStat="+StrCatStat); smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat); break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at varData section"; throw new IllegalArgumentException(errorMessage); } // switch dbgLog.fine("unfvalue(last)=" + unfValue); return unfValue; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.ddi.DDIFileReader.java
private String getUNF(Object[] varData, String[] dateFormats, int variableType, String unfVersionNumber, int variablePosition) throws NumberFormatException, UnfException, IOException, NoSuchAlgorithmException { String unfValue = null;/*from ww w. j a v a 2s .c om*/ dbgLog.fine("variableType=" + variableType); dbgLog.finer("unfVersionNumber=" + unfVersionNumber); dbgLog.fine("variablePosition=" + variablePosition); //dbgLog.fine("variableName="+variableNameList.get(variablePosition)); switch (variableType) { case 0: // Integer (Long): dbgLog.fine("Integer case"); // Convert array of Strings to array of Longs Long[] ldata = new Long[varData.length]; for (int i = 0; i < varData.length; i++) { //if (varData[i] != null) { try { ldata[i] = new Long((String) varData[i]); } catch (Exception ex) { ldata[i] = null; } //} } unfValue = UNF5Util.calculateUNF(ldata); dbgLog.finer("integer:unfValue=" + unfValue); //dbgLog.finer("sumstat:long case=" + Arrays.deepToString( // ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata)))); smd.getSummaryStatisticsTable().put(variablePosition, ArrayUtils.toObject(StatHelper.calculateSummaryStatistics(ldata))); Map<String, Integer> catStat = StatHelper.calculateCategoryStatistics(ldata); smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), catStat); break; case 1: // Double: dbgLog.finer("double case"); // Convert array of Strings to array of Doubles Double[] ddata = new Double[varData.length]; for (int i = 0; i < varData.length; i++) { //if (varData[i]!=null) { try { ddata[i] = new Double((String) varData[i]); } catch (Exception ex) { ddata[i] = null; } //} } unfValue = UNF5Util.calculateUNF(ddata); dbgLog.finer("double:unfValue=" + unfValue); smd.getSummaryStatisticsTable().put(variablePosition, ArrayUtils.toObject(StatHelper.calculateSummaryStatisticsContDistSample(ddata))); break; case -1: // String: // // i.e., this is something *stored* as string; it may still be // a more complex data type than just a string of characters. // Namely, it can be some date or time type that we support. // These should be handled differently when calculating the // UNFs. dbgLog.finer("string case"); String[] strdata = Arrays.asList(varData).toArray(new String[varData.length]); dbgLog.finer("string array passed to calculateUNF: " + Arrays.deepToString(strdata)); if (dateFormats != null) { for (int i = 0; i < varData.length; i++) { if (dateFormats[i] != null && (strdata[i].equals("") || strdata[i].equals(" "))) { strdata[i] = null; dateFormats[i] = null; } } unfValue = UNF5Util.calculateUNF(strdata, dateFormats); } else { unfValue = UNF5Util.calculateUNF(strdata); } dbgLog.finer("string:unfValue=" + unfValue); smd.getSummaryStatisticsTable().put(variablePosition, StatHelper.calculateSummaryStatistics(strdata)); Map<String, Integer> StrCatStat = StatHelper.calculateCategoryStatistics(strdata); //out.println("catStat="+StrCatStat); smd.getCategoryStatisticsTable().put(variableNameList.get(variablePosition), StrCatStat); break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at varData section"; throw new IllegalArgumentException(errorMessage); } // switch dbgLog.fine("unfvalue(last)=" + unfValue); return unfValue; }