List of usage examples for java.util Arrays deepToString
public static String deepToString(Object[] a)
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*w w w. j a va 2s .c o m*/ int nvar = (Integer) smd.getFileInformation().get("varQnty"); int nobs = (Integer) smd.getFileInformation().get("caseQnty"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("StringVariableTable=" + StringVariableTable); FileOutputStream fileOutTab = null; PrintWriter pwout = null; // create a File object to save the tab-delimited data file File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath(); // save the temp file name in the metadata object smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); // data storage // Object[][] dataTable = new Object[nobs][nvar]; // for later variable-wise calculations of statistics // dataTable2 sotres cut-out data columnwise Object[][] dataTable2 = new Object[nvar][nobs]; String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypelList[columnCounter]); String variableFormat = variableFormats[columnCounter]; boolean isDateTimeDatum = isDateTimeDatumList[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTextDataFileNumeric; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { dataRow[columnCounter] = byte_datum; dataTable2[columnCounter][i] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = short_datum; dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum); if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + int_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = int_datum; dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = float_datum; dataRow[columnCounter] = float_datum; } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = double_datum; dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringVariableTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); if (string_datum.equals("")) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); dataRow[columnCounter] = MissingValueForTextDataFileString; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\"")); /* * Fixing the bug we've had in the Stata reader for * a longest time: new lines and tabs need to * be escaped too - otherwise it breaks our * TAB file structure! -- L.A. */ escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string will be // stored in the tab file: dataRow[columnCounter] = "\"" + escapedString + "\""; // but note that the "raw" version of it is // used for the UNF: dataTable2[columnCounter][i] = string_datum; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // dump the row of data to the external file pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer("\ndataTable2(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dataTable2)); dbgLog.finer("\ndateFormat(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dateFormat)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList)); dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal)); } String[] unfValues = new String[nvar]; for (int j = 0; j < nvar; j++) { String variableType_j = variableTypelListFinal[j]; unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "th unf value" + unfValues[j]); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); fileUnfValue = UNF5Util.calculateUNF(unfValues); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("file-unf=" + fileUnfValue); stataDataSection.setUnf(unfValues); stataDataSection.setFileUnf(fileUnfValue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUnfValue); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf values:\n" + unfValues); stataDataSection.setData(dataTable2); // close the stream dbgLog.fine("***** decodeData(): end *****\n\n"); }
From source file:org.freedesktop.dbus.Message.java
/** * Demarshall one value from a buffer./*w ww. jav a 2 s .co m*/ * * @param sigb * A buffer of the D-Bus signature. * @param buf * The buffer to demarshall from. * @param ofs * An array of two ints, the offset into the signature buffer * and the offset into the data buffer. These values will be * updated to the start of the next value ofter demarshalling. * @param contained * converts nested arrays to Lists * @return The demarshalled value. */ private Object extractone(byte[] sigb, byte[] buf, int[] ofs, boolean contained) throws DBusException { if (log.isTraceEnabled()) { log.trace("Extracting type: " + ((char) sigb[ofs[0]]) + " from offset " + ofs[1]); } Object rv = null; ofs[1] = align(ofs[1], sigb[ofs[0]]); switch (sigb[ofs[0]]) { case ArgumentType.BYTE: rv = buf[ofs[1]++]; break; case ArgumentType.UINT32: rv = new UInt32(demarshallint(buf, ofs[1], 4)); ofs[1] += 4; break; case ArgumentType.INT32: rv = (int) demarshallint(buf, ofs[1], 4); ofs[1] += 4; break; case ArgumentType.INT16: rv = (short) demarshallint(buf, ofs[1], 2); ofs[1] += 2; break; case ArgumentType.UINT16: rv = new UInt16((int) demarshallint(buf, ofs[1], 2)); ofs[1] += 2; break; case ArgumentType.INT64: rv = demarshallint(buf, ofs[1], 8); ofs[1] += 8; break; case ArgumentType.UINT64: long top; long bottom; if (this.big) { top = demarshallint(buf, ofs[1], 4); ofs[1] += 4; bottom = demarshallint(buf, ofs[1], 4); } else { bottom = demarshallint(buf, ofs[1], 4); ofs[1] += 4; top = demarshallint(buf, ofs[1], 4); } rv = new UInt64(top, bottom); ofs[1] += 4; break; case ArgumentType.DOUBLE: long l = demarshallint(buf, ofs[1], 8); ofs[1] += 8; rv = Double.longBitsToDouble(l); break; case ArgumentType.FLOAT: int rf = (int) demarshallint(buf, ofs[1], 4); ofs[1] += 4; rv = Float.intBitsToFloat(rf); break; case ArgumentType.BOOLEAN: rf = (int) demarshallint(buf, ofs[1], 4); ofs[1] += 4; rv = (1 == rf) ? Boolean.TRUE : Boolean.FALSE; break; case ArgumentType.ARRAY: long size = demarshallint(buf, ofs[1], 4); if (log.isTraceEnabled()) { log.trace("Reading array of size: " + size); } ofs[1] += 4; byte algn = (byte) getAlignment(sigb[++ofs[0]]); ofs[1] = align(ofs[1], sigb[ofs[0]]); int length = (int) (size / algn); if (length > AbstractConnection.MAX_ARRAY_LENGTH) throw new MarshallingException("Arrays must not exceed " + AbstractConnection.MAX_ARRAY_LENGTH); // optimise primatives switch (sigb[ofs[0]]) { case ArgumentType.BYTE: rv = new byte[length]; System.arraycopy(buf, ofs[1], rv, 0, length); ofs[1] += size; break; case ArgumentType.INT16: rv = new short[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((short[]) rv)[j] = (short) demarshallint(buf, ofs[1], algn); break; case ArgumentType.INT32: rv = new int[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((int[]) rv)[j] = (int) demarshallint(buf, ofs[1], algn); break; case ArgumentType.INT64: rv = new long[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((long[]) rv)[j] = demarshallint(buf, ofs[1], algn); break; case ArgumentType.BOOLEAN: rv = new boolean[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((boolean[]) rv)[j] = (1 == demarshallint(buf, ofs[1], algn)); break; case ArgumentType.FLOAT: rv = new float[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((float[]) rv)[j] = Float.intBitsToFloat((int) demarshallint(buf, ofs[1], algn)); break; case ArgumentType.DOUBLE: rv = new double[length]; for (int j = 0; j < length; j++, ofs[1] += algn) ((double[]) rv)[j] = Double.longBitsToDouble(demarshallint(buf, ofs[1], algn)); break; case ArgumentType.DICT_ENTRY1: if (0 == size) { // advance the type parser even on 0-size arrays. Vector<Type> temp = new Vector<>(); byte[] temp2 = new byte[sigb.length - ofs[0]]; System.arraycopy(sigb, ofs[0], temp2, 0, temp2.length); String temp3 = new String(temp2); // ofs[0] gets incremented anyway. Leave one character on the stack int temp4 = Marshalling.getJavaType(temp3, temp, 1) - 1; ofs[0] += temp4; if (log.isTraceEnabled()) { log.trace("Aligned type: " + temp3 + " " + temp4 + " " + ofs[0]); } } int ofssave = ofs[0]; long end = ofs[1] + size; Vector<Object[]> entries = new Vector<>(); while (ofs[1] < end) { ofs[0] = ofssave; entries.add((Object[]) extractone(sigb, buf, ofs, true)); } rv = new DBusMap<>(entries.toArray(new Object[0][])); break; default: if (0 == size) { // advance the type parser even on 0-size arrays. Vector<Type> temp = new Vector<>(); byte[] temp2 = new byte[sigb.length - ofs[0]]; System.arraycopy(sigb, ofs[0], temp2, 0, temp2.length); String temp3 = new String(temp2); // ofs[0] gets incremented anyway. Leave one character on the stack int temp4 = Marshalling.getJavaType(temp3, temp, 1) - 1; ofs[0] += temp4; if (log.isTraceEnabled()) { log.trace("Aligned type: " + temp3 + " " + temp4 + " " + ofs[0]); } } ofssave = ofs[0]; end = ofs[1] + size; Vector<Object> contents = new Vector<>(); while (ofs[1] < end) { ofs[0] = ofssave; contents.add(extractone(sigb, buf, ofs, true)); } rv = contents; } if (contained && !(rv instanceof List) && !(rv instanceof Map)) rv = ArrayFrob.listify(rv); break; case ArgumentType.STRUCT1: Vector<Object> contents = new Vector<>(); while (sigb[++ofs[0]] != ArgumentType.STRUCT2) contents.add(extractone(sigb, buf, ofs, true)); rv = contents.toArray(); break; case ArgumentType.DICT_ENTRY1: Object[] decontents = new Object[2]; if (log.isTraceEnabled()) { Hex h = new Hex(); log.trace( "Extracting Dict Entry (" + h.encode(Arrays.copyOfRange(sigb, ofs[0], sigb.length - ofs[0])) + ") from: " + h.encode(Arrays.copyOfRange(buf, ofs[1], buf.length - ofs[1]))); } ofs[0]++; decontents[0] = extractone(sigb, buf, ofs, true); ofs[0]++; decontents[1] = extractone(sigb, buf, ofs, true); ofs[0]++; rv = decontents; break; case ArgumentType.VARIANT: int[] newofs = new int[] { 0, ofs[1] }; String sig = (String) extract(ArgumentType.SIGNATURE_STRING, buf, newofs)[0]; newofs[0] = 0; rv = new Variant<>(extract(sig, buf, newofs)[0], sig); ofs[1] = newofs[1]; break; case ArgumentType.STRING: length = (int) demarshallint(buf, ofs[1], 4); ofs[1] += 4; try { rv = new String(buf, ofs[1], length, "UTF-8"); } catch (UnsupportedEncodingException UEe) { throw new DBusException("System does not support UTF-8 encoding", UEe); } ofs[1] += length + 1; break; case ArgumentType.OBJECT_PATH: length = (int) demarshallint(buf, ofs[1], 4); ofs[1] += 4; rv = new ObjectPath(getSource(), new String(buf, ofs[1], length)); ofs[1] += length + 1; break; case ArgumentType.SIGNATURE: length = (buf[ofs[1]++] & 0xFF); rv = new String(buf, ofs[1], length); ofs[1] += length + 1; break; default: throw new UnknownTypeCodeException(sigb[ofs[0]]); } if (log.isDebugEnabled()) { if (rv instanceof Object[]) log.trace("Extracted: " + Arrays.deepToString((Object[]) rv) + " (now at " + ofs[1] + ")"); else log.trace("Extracted: " + rv + " (now at " + ofs[1] + ")"); } return rv; }
From source file:org.apache.cxf.systest.jaxrs.JAXRSClientServerBookTest.java
@Test public void testAddBookProxyResponse() { Book b = new Book("CXF rocks", 123L); System.out.println(Arrays.deepToString(Arrays.asList(b, b).toArray())); BookStore store = JAXRSClientFactory.create("http://localhost:" + PORT, BookStore.class); Response r = store.addBook(b); assertNotNull(r);//from w w w. ja v a 2 s .c o m InputStream is = (InputStream) r.getEntity(); assertNotNull(is); XMLSource source = new XMLSource(is); source.setBuffering(); assertEquals(124L, Long.parseLong(source.getValue("Book/id"))); assertEquals("CXF rocks", source.getValue("Book/name")); }
From source file:org.broadinstitute.sting.commandline.ArgumentTypeDescriptor.java
public MultiplexArgumentTypeDescriptor createCustomTypeDescriptor(ParsingEngine parsingEngine, ArgumentSource dependentArgument, Object containingObject) { String[] sourceFields = dependentArgument.field.getAnnotation(Multiplex.class).arguments(); List<ArgumentSource> allSources = parsingEngine.extractArgumentSources(containingObject.getClass()); Class[] sourceTypes = new Class[sourceFields.length]; Object[] sourceValues = new Object[sourceFields.length]; int currentField = 0; for (String sourceField : sourceFields) { boolean fieldFound = false; for (ArgumentSource source : allSources) { if (!source.field.getName().equals(sourceField)) continue; if (source.field.isAnnotationPresent(Multiplex.class)) throw new ReviewedStingException( "Command-line arguments can only depend on independent fields"); sourceTypes[currentField] = source.field.getType(); sourceValues[currentField] = JVMUtils.getFieldValue(source.field, containingObject); currentField++;/*from www . jav a2s . c om*/ fieldFound = true; } if (!fieldFound) throw new ReviewedStingException( String.format("Unable to find source field %s, referred to by dependent field %s", sourceField, dependentArgument.field.getName())); } Class<? extends Multiplexer> multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class) .value(); Constructor<? extends Multiplexer> multiplexerConstructor; try { multiplexerConstructor = multiplexerType.getConstructor(sourceTypes); multiplexerConstructor.setAccessible(true); } catch (NoSuchMethodException ex) { throw new ReviewedStingException( String.format("Unable to find constructor for class %s with parameters %s", multiplexerType.getName(), Arrays.deepToString(sourceFields)), ex); } Multiplexer multiplexer; try { multiplexer = multiplexerConstructor.newInstance(sourceValues); } catch (IllegalAccessException ex) { throw new ReviewedStingException( String.format("Constructor for class %s with parameters %s is inaccessible", multiplexerType.getName(), Arrays.deepToString(sourceFields)), ex); } catch (InstantiationException ex) { throw new ReviewedStingException(String.format("Can't create class %s with parameters %s", multiplexerType.getName(), Arrays.deepToString(sourceFields)), ex); } catch (InvocationTargetException ex) { throw new ReviewedStingException( String.format("Can't invoke constructor of class %s with parameters %s", multiplexerType.getName(), Arrays.deepToString(sourceFields)), ex); } return new MultiplexArgumentTypeDescriptor(multiplexer, multiplexer.multiplex()); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.ddi.DDIFileReader.java
private void calculateDatasetStats(DataTable csvData) { String fileUNFvalue = null;/* w w w. j a va 2 s . c o m*/ String[] unfValues = new String[getVarQnty()]; // TODO: // Catch and differentiate between different exception // that the UNF methods throw. for (int j = 0; j < getVarQnty(); j++) { int variableTypeNumer = unfVariableTypes.get(variableNameList.get(j)); String varFormat = smd.getVariableFormatName().get(smd.getVariableName()[j]); try { dbgLog.finer("j = " + j); // Before we pass the variable vector to the UNF calculator, // we need to check if is of any supported date/time type. // If so, we'll also need to create and pass a list of // date formats, so that the UNFs could be properly calculated. // (otherwise the date/time values will be treated simply as // strings!) if (varFormat != null && (varFormat.equals("WKDAY") || varFormat.equals("MONTH") || "date".equals(SPSSConstants.FORMAT_CATEGORY_TABLE.get(varFormat)) || "time".equals(SPSSConstants.FORMAT_CATEGORY_TABLE.get(varFormat)))) { // TODO: // All these date, time, weekday, etc. values need to be validated! String[] dateFormats = new String[getCaseQnty()]; for (int k = 0; k < getCaseQnty(); k++) { if (SPSSConstants.FORMAT_CATEGORY_TABLE.get(varFormat).equals("date")) { dbgLog.finer("date case"); dateFormats[k] = sdf_ymd.toPattern(); } else if (SPSSConstants.FORMAT_CATEGORY_TABLE.get(varFormat).equals("time")) { dbgLog.finer("time case: DTIME or DATETIME or TIME"); if (varFormat.equals("DTIME")) { dateFormats[k] = sdf_dhms.toPattern(); } else if (varFormat.equals("DATETIME")) { dateFormats[k] = sdf_ymdhms.toPattern(); } else if (varFormat.equals("TIME")) { dateFormats[k] = sdf_hms.toPattern(); } } else if (varFormat.equals("WKDAY")) { // TODO: these need to be validated only. dateFormats = null; } else if (varFormat.equals("MONTH")) { // TODO: these need to be validated only. dateFormats = null; } } unfValues[j] = getUNF(csvData.getData()[j], dateFormats, variableTypeNumer, unfVersionNumber, j); } else { unfValues[j] = getUNF(csvData.getData()[j], null, variableTypeNumer, unfVersionNumber, j); } dbgLog.fine(j + "th unf value" + unfValues[j]); } catch (NumberFormatException ex) { ex.printStackTrace(); } catch (UnfException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); //throw ex; } catch (NoSuchAlgorithmException ex) { ex.printStackTrace(); } } dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); try { fileUNFvalue = UNF5Util.calculateUNF(unfValues); } catch (NumberFormatException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); //throw ex; } // Set the UNFs we have calculated, the ones for the individual // variables and the file-level UNF: csvData.setUnf(unfValues); csvData.setFileUnf(fileUNFvalue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUNFvalue); dbgLog.fine("file-level unf value:\n" + fileUNFvalue); }
From source file:com.taobao.adfs.util.Utilities.java
public static String getPathInName(String[] names, int index) throws IOException { if (index >= names.length) index = names.length - 1;//from ww w . ja v a 2s. c om if (index < 0) index = 0; if (names.length == 0) throw new IOException("names.length is 0"); if (index == 0) return "/"; StringBuilder path = new StringBuilder(1024); for (int i = 1; i <= index; ++i) { if (names[i] == null) throw new IOException("names[" + i + "] is null, names=" + Arrays.deepToString(names)); path.append("/").append(names[i]); } return path.toString(); }
From source file:org.richfaces.tests.metamer.ftest.AbstractWebDriverTest.java
public void testRequestEventsAfter(final String... events) { Graphene.waitModel().until(new Predicate<WebDriver>() { private String actualEvents; private int lastNumberOfEvents; @Override//from ww w. j a v a 2 s. co m public boolean apply(WebDriver arg0) { actualEvents = ((String) executeJS("return sessionStorage.getItem(\"metamerEvents\")")); lastNumberOfEvents = (StringUtils.isBlank(actualEvents) ? 0 : actualEvents.split(" ").length); return lastNumberOfEvents == events.length; } @Override public String toString() { return format("number of events is equal to {0}, found {1}. Actual events: {2}", events.length, lastNumberOfEvents, actualEvents); } }); String[] actualEvents = ((String) executeJS("return sessionStorage.getItem(\"metamerEvents\")")).split(" "); assertEquals(actualEvents, events, String.format("The events (%s) don't came in right order (%s)", Arrays.deepToString(actualEvents), Arrays.deepToString(events))); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType2(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//ww w .ja v a 2s .c o m Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: \n\n+++++++++++ " + j + "-th RT2 unit is to be decoded +++++++++++"); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } dbgLog.fine("recordType2FixedPart=" + ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.info(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; //throw new IOException("RT2 reading error: The current position is no longer Record Type 2"); } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; caseWeightVariableIndex = variableCounter; smd.setCaseWeightVariableName(caseWeightVariableName); smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.info(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.info("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j == OBSUnitsPerCase) { dbgLog.fine("RT2 metadata-related exit-chores"); smd.getFileInformation().put("varQnty", variableCounter); varQnty = variableCounter; dbgLog.fine("RT2: varQnty=" + varQnty); smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); smd.setVariableLabel(variableLabelMap); smd.setMissingValueTable(missingValueTable); smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.info("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); smd.setVariableFormat(printFormatList); smd.setVariableFormatName(printFormatNameTable); dbgLog.info("<<<<<<"); dbgLog.info("printFormatList = " + printFormatList); dbgLog.info("printFormatNameTable = " + printFormatNameTable); // dbgLog.info("formatCategoryTable = " + formatCategoryTable); dbgLog.info(">>>>>>"); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); // variableType is determined after the valueTable is finalized } else { dbgLog.info("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("***** decodeRecordType2(): end *****"); }
From source file:edu.harvard.iq.dvn.core.web.subsetting.AnalysisPage.java
public String dwnldAction() { dbgLog.fine("***** within dwnldAction() *****"); resetMsgDwnldButton();//from w w w. j a v a 2s . co m if (checkDwnldParameters()) { FacesContext cntxt = FacesContext.getCurrentInstance(); HttpServletResponse res = (HttpServletResponse) cntxt.getExternalContext().getResponse(); HttpServletRequest req = (HttpServletRequest) cntxt.getExternalContext().getRequest(); StudyFile sf = dataTable.getStudyFile(); Long noRecords = dataTable.getRecordsPerCase(); String dsbUrl = getDsbUrl(); dbgLog.fine("dsbUrl=" + dsbUrl); String serverPrefix = req.getScheme() + "://" + req.getServerName() + ":" + req.getServerPort() + req.getContextPath(); dbgLog.fine("serverPrefix" + serverPrefix); Map<String, List<String>> mpl = new HashMap<String, List<String>>(); // File inFile = new File(sf.getFileSystemLocation()); // File origFile = new File(inFile.getParent(), "_" + sf.getFileSystemName()); String formatType = (String) dwnldFileTypeSet.getValue(); dbgLog.fine("file type from the binding=" + formatType); if (formatType == null) { formatType = dwnldFileTypeSelected; dbgLog.fine("file type from the value=" + dwnldFileTypeSelected); } mpl.put("dtdwnld", Arrays.asList(formatType)); dbgLog.fine("citation info to be sent:\n" + getCitation()); mpl.put("studytitle", Arrays.asList(getStudyTitle())); dbgLog.fine("studyId=" + getStudyId().toString()); mpl.put("studyno", Arrays.asList(getStudyId().toString())); mpl.put("studyURL", Arrays.asList(studyURL)); mpl.put("", Arrays.asList("")); mpl.put("browserType", Arrays.asList(browserType)); mpl.put("recodedVarIdSet", getRecodedVarIdSet()); mpl.put("recodedVarNameSet", getRecodedVarNameSet()); mpl.put("recodedVarLabelSet", getRecodedVarLabelSet()); mpl.put("recodedVarTypeSet", getRecodedVariableType()); mpl.put("recodedVarBaseTypeSet", getBaseVariableTypeForRecodedVariable()); mpl.put("baseVarIdSet", getBaseVarIdSetFromRecodedVarIdSet()); mpl.put("baseVarNameSet", getBaseVarNameSetFromRecodedVarIdSet()); mpl.put("requestType", Arrays.asList("Download")); // Added by Matt Owen to find a way to sneak in Metadata that is lost between Rdata -> Tab mpl.put("originalFile", Arrays.asList(getOriginalFileSystemLocation().getAbsolutePath())); // ----------------------------------------------------- // Processing route, step by step: // // Step 0. Locate the data file and its attributes String fileId = sf.getId().toString(); String fileloc = sf.getFileSystemLocation(); String tabflnm = sf.getFileName(); boolean sbstOK = sf.isSubsettable(); String flct = sf.getFileType(); // Output debug statements dbgLog.info("location=" + fileloc); dbgLog.info("filename=" + tabflnm); dbgLog.info("subsettable=" + sbstOK); dbgLog.info("filetype=" + flct); dbgLog.info("studyUrl = " + studyURL); dbgLog.info("original file asbolute path = " + getOriginalFileSystemLocation().getAbsolutePath()); // D DvnRJobRequest sro = null; List<File> zipFileList = new ArrayList(); File tmpsbfl = null; if (sbstOK) { try { // this temp file will store the requested column(s): tmpsbfl = File.createTempFile("tempsubsetfile.", ".tab"); deleteTempFileList.add(tmpsbfl); // to produce this file, we'll either open the stream // and run our local subsetting code on it, or request // the subsetting to be performed natively by the access // driver, if it supports the functionality: // check whether a source file is tab-delimited or not boolean fieldcut = true; if ((noRecords != null) && (noRecords >= 1)) { fieldcut = false; } DataAccessRequest daReq = new DataAccessRequest(); daReq.setParameter("noVarHeader", "1"); DataAccessObject accessObject = DataAccess.createDataAccessObject(sf, daReq); if (accessObject.isSubsetSupported()) { dbgLog.fine("Using NATIVE subset functionality of the repository."); daReq.setParameter("vars", getVariableNamesForSubset()); accessObject.open(); InputStream inSubset = accessObject.getInputStream(); OutputStream outSubset = new BufferedOutputStream( new FileOutputStream(tmpsbfl.getAbsolutePath())); int bufsize = 8192; byte[] subsetDataBuffer = new byte[bufsize]; while ((bufsize = inSubset.read(subsetDataBuffer)) != -1) { outSubset.write(subsetDataBuffer, 0, bufsize); } inSubset.close(); outSubset.close(); // TODO: catch exceptions; reset the state of the page // if anything went wrong. See the fixed-field section // below for an example. } else { accessObject.open(); if (fieldcut) { // Cutting requested fields of data from a TAB-delimited stream: Set<Integer> fields = getFieldNumbersForSubsetting(); dbgLog.fine("subsetting fields=" + fields); // Create an instance of DvnJavaFieldCutter FieldCutter fc = new DvnJavaFieldCutter(); // Executes the subsetting request fc.subsetFile(accessObject.getInputStream(), tmpsbfl.getAbsolutePath(), fields, dataTable.getCaseQuantity(), "\t"); // TODO: catch exceptions; reset the state of the page // if anything went wrong. See the fixed-field section // below for an example. } else { // Cutting requested columns of data from a fixed-field stream: Map<Long, List<List<Integer>>> varMetaSet = getSubsettingMetaData(noRecords); DvnNewJavaFieldCutter fc = new DvnNewJavaFieldCutter(varMetaSet); try { //fc.cutColumns(new File(cutOp1), noRecords.intValue(), 0, "\t", tmpsbfl.getAbsolutePath()); fc.cutColumns(accessObject.getInputStream(), noRecords.intValue(), 0, "\t", tmpsbfl.getAbsolutePath()); } catch (FileNotFoundException e) { e.printStackTrace(); msgDwnldButton.setValue("* could not generate subset due to an IO problem"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to an IO problem "); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } catch (RuntimeException re) { re.printStackTrace(); msgDwnldButton.setValue("* could not generate subset due to an runtime error"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to an runtime error"); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } } } // Checks the resulting subset file: if (tmpsbfl.exists()) { Long subsetFileSize = tmpsbfl.length(); dbgLog.fine("subset file:Length=" + subsetFileSize); dbgLog.fine("subset file:name=" + tmpsbfl.getAbsolutePath()); if (subsetFileSize > 0) { mpl.put("subsetFileName", Arrays.asList(tmpsbfl.getAbsolutePath())); mpl.put("subsetDataFileName", Arrays.asList(tmpsbfl.getName())); } else { // subset file exists but it is empty msgDwnldButton.setValue("* an subset file is empty"); msgDwnldButton.setVisible(true); dbgLog.warning( "exiting dwnldAction() due to a subsetting error:" + "a subset file is empty"); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } } else { // subset file was not created msgDwnldButton.setValue("* a subset file was not created"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to a subsetting error:" + "a subset file was not created"); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } // If we've made it this far, we can increment the number of // downloads for the study file: VDC vdc = vdcService.getVDCFromRequest(req); GuestBookResponse guestbookResponse = (GuestBookResponse) getVDCSessionBean() .getGuestbookResponseMap().get("guestBookResponse_" + sf.getStudy().getId()); if (guestbookResponse == null) { //need to set up dummy network response guestbookResponse = guestBookResponseServiceBean.initNetworkGuestBookResponse(sf.getStudy(), sf, getVDCSessionBean().getLoginBean()); } guestbookResponse.setStudyVersion(sf.getStudy().getStudyVersionByNumber(versionNumber)); String jsessionId = null; Cookie cookies[] = req.getCookies(); for (int i = 0; i < cookies.length; i++) { if ("JSESSIONID".equals(cookies[i].getName())) { jsessionId = cookies[i].getValue(); } } if (jsessionId == null || "".equals(jsessionId)) { String[] stringArray = getVDCSessionBean().toString().split("@"); jsessionId = stringArray[1]; } guestbookResponse.setSessionId(jsessionId); String friendlyFormatName = ""; String formatRequestedMimeType = ""; if (formatType != null && !"".equals(formatType)) { if (formatType.equals("D00")) { formatRequestedMimeType = "text/tab-separated-values"; // tabular } else if (formatType.equals("D01")) { formatRequestedMimeType = "text/tab-separated-values"; // fixed-field } else { for (DataFileFormatType type : studyService.getDataFileFormatTypes()) { if (formatType.equals(type.getValue())) { formatRequestedMimeType = type.getMimeType(); } } } } if (formatRequestedMimeType == null || "".equals(formatRequestedMimeType)) { formatRequestedMimeType = "application/x-unknown"; } friendlyFormatName = FileUtil.getUserFriendlyTypeForMime(formatRequestedMimeType); guestbookResponse.setDownloadtype("Subsetting - " + friendlyFormatName); if (vdc != null) { studyService.incrementNumberOfDownloads(sf.getId(), vdc.getId(), (GuestBookResponse) guestbookResponse); } else { studyService.incrementNumberOfDownloads(sf.getId(), (Long) null, (GuestBookResponse) guestbookResponse); } // Step 3. Organizes parameters/metadata to be sent to the implemented // data-analysis-service class // skip the Rserve call completely (for plain tab file format, with no recoding)! -- L.A. if (!formatType.equals("D01") || (recodeSchema.size() > 0)) { Map<String, Map<String, String>> vls = getValueTablesForAllRequestedVariables(); // New (as of 3.6): support for ordered categorical variables // (ingested from R ordered factors). // Note that this is only being added here, i.e., to the // download-and-save part; if/when we make the analysis // and statistics utilize/handle these ordered categories // in some special way, we'll need to add the actual // ordered values to the SRO objects there as well. -- L.A. Map<String, List<String>> categoryOrders = getCategoryValueOrdersForAllRequestedVariables(); if (categoryOrders != null) { sro = new DvnRJobRequest(getDataVariableForRequest(), mpl, vls, recodeSchema, categoryOrders, null); } else { sro = new DvnRJobRequest(getDataVariableForRequest(), mpl, vls, recodeSchema); } /* * Add the recoded -> base variable name map; (new as of v3.6;) * TODO: (?) do the same for the other action requests. * -- L.A. */ sro.setRecodedToBaseVar(getRecodedVarToBaseVarName()); // dbgLog.fine("sro dump:\n"+ToStringBuilder.reflectionToString(sro, ToStringStyle.MULTI_LINE_STYLE)); // Step 4. Creates an instance of the the implemented // data-analysis-service class DvnRDataAnalysisServiceImpl das = new DvnRDataAnalysisServiceImpl(); // Executes a download or data analysis request and // stores the results in a Map <String, String> resultInfo = das.execute(sro); // Step 5. Check the exit status of the R process: if (resultInfo.get("RexecError").equals("true")) { msgDwnldButton.setValue("* The Request failed due to an R-runtime error"); msgDwnldButton.setVisible(true); dbgLog.fine("exiting dwnldAction() due to an R-runtime error"); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } } if (recodeSchema.size() > 0) { resultInfo.put("subsettingCriteria", sro.getSubsetConditionsForCitation()); } else { resultInfo.put("subsettingCriteria", "variables: " + getVariableNamesForSubset()); } } catch (MalformedURLException e) { e.printStackTrace(); msgDwnldButton.setValue("* file URL is malformed"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to a URL problem "); getVDCRequestBean().setSelectedTab("tabDwnld"); return ""; } catch (IOException e) { // this may occur if the dataverse is not released, // or if the file exists, but it is not accessible, etc. e.printStackTrace(); msgDwnldButton.setValue("* an IO problem occurred"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to an IO problem "); getVDCRequestBean().setSelectedTab("tabDwnld"); return ""; } // end of subset-OK case } else { // not subsettable data file msgDwnldButton.setValue("* this data file is not subsettable file"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction(): the data file is not subsettable "); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } // end:subsetNotOKcase // final processing steps for all successful cases // add study-metadata to the resultInfo map if (formatType.equals("D01") && !(recodeSchema.size() > 0)) { resultInfo.put("wbDataFileName", tmpsbfl.getAbsolutePath()); // Fields that would normally be populated by R: resultInfo.put("PID", "N/A"); resultInfo.put("R_min_verion_no", "N/A"); resultInfo.put("dsbHost", "N/A"); Date now = new Date(); resultInfo.put("RexecDate", now.toString()); } else { resultInfo.put("R_min_verion_no", resultInfo.get("Rversion").substring(2)); } resultInfo.put("offlineCitation", getCitation()); resultInfo.put("studyTitle", getStudyTitle()); resultInfo.put("studyNo", getStudyId().toString()); resultInfo.put("dtId", dtId.toString()); if (versionNumber != null) { resultInfo.put("versionNumber", versionNumber.toString()); } resultInfo.put("studyURL", studyURL); resultInfo.put("dataverse_version_no", dvnVersionNumber); resultInfo.put("option", "subset"); resultInfo.put("variableList", getVariableNamesForSubset()); // calculate UNF (locally, on the application side): List<DataVariable> subsetVariableList = getDataVariableForRequest(); String subsetUNFvalue = "[NOT CALCULATED]"; String[] unfValues = new String[subsetVariableList.size()]; for (int i = 0; i < subsetVariableList.size(); i++) { unfValues[i] = subsetVariableList.get(i).getUnf(); } dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); try { subsetUNFvalue = UNF5Util.calculateUNF(unfValues); } catch (NumberFormatException ex) { // if anything went wrong during the UNF calculation, it's not // a fatal condition; we'll just be displaying "not calculated" // instead of the UNF in the final README file. dbgLog.fine("error while trying to calculate subset UNF: Number Format Exception."); ex.printStackTrace(); } catch (IOException ex) { dbgLog.fine("error while trying to calculate subset UNF: IO Exception."); ex.printStackTrace(); } resultInfo.put("fileUNF", subsetUNFvalue); // writing necessary files: try { if (formatType.equals("D01") && !(recodeSchema.size() > 0)) { // (2) tab-delimited-format-only step: // // In the final zip file we package the subset file // and a replication README file (also contains citation). // We also *used to* include the SAS, SPSS and R control // files created by R. We are not doing this anymore, but // I left the code commented-out below. // -- L.A. Jan. 2012 // // We are also adding the variable header to the file here. /* SKIP CODE FILES -- L.A. String codeFileSas = "codeFile_sas_" + resultInfo.get("PID") + ".sas"; File tmpCCsasfl = new File(TEMP_DIR, codeFileSas); deleteTempFileList.add(tmpCCsasfl); zipFileList.add(tmpCCsasfl); String codeFileSpss = "codeFile_spss_" + resultInfo.get("PID") + ".sps"; File tmpCCspsfl = new File(TEMP_DIR, codeFileSpss); deleteTempFileList.add(tmpCCspsfl); zipFileList.add(tmpCCspsfl); String codeFileStata = "codeFile_stata_" + resultInfo.get("PID") + ".do"; File tmpCCdofl = new File(TEMP_DIR, codeFileStata); deleteTempFileList.add(tmpCCdofl); zipFileList.add(tmpCCdofl); StatisticalCodeFileWriter scfw = new StatisticalCodeFileWriter(sro); scfw.write(tmpCCsasfl, tmpCCspsfl, tmpCCdofl); */ // add the subset file: File tmpsbflnew = File.createTempFile("tempsubsetfile_new.", ".tab"); deleteTempFileList.add(tmpsbflnew); InputStream inb = new BufferedInputStream(new FileInputStream(tmpsbfl)); OutputStream outb = new BufferedOutputStream(new FileOutputStream(tmpsbflnew)); String varHeaderLine = getVariableHeaderForSubset(); // Add the variable header to the subset file: byte[] varHeaderBuffer = null; varHeaderBuffer = varHeaderLine.getBytes(); outb.write(varHeaderBuffer); outb.flush(); int bufsize; byte[] bffr = new byte[8192]; while ((bufsize = inb.read(bffr)) != -1) { outb.write(bffr, 0, bufsize); } inb.close(); outb.close(); dbgLog.fine("adding tab file: " + tmpsbflnew.getName()); zipFileList.add(tmpsbflnew); } else { // (2)The format-converted subset data file String wbDataFileName = resultInfo.get("wbDataFileName"); dbgLog.fine("wbDataFileName=" + wbDataFileName); File wbSubsetDataFile = new File(wbDataFileName); if (wbSubsetDataFile.exists()) { dbgLog.fine("wbSubsetDataFile:length=" + wbSubsetDataFile.length()); deleteTempFileList.add(wbSubsetDataFile); zipFileList.add(wbSubsetDataFile); } else { // the data file was not created dbgLog.fine("wbSubsetDataFile does not exist"); msgDwnldButton.setValue("* The requested data file is not available"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction(): data file was not transferred"); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } } // Create README file: String readMeFileName = null; if (resultInfo.get("PID") != null && !resultInfo.get("PID").equals("N/A")) { readMeFileName = REP_README_FILE_PREFIX + resultInfo.get("PID") + ".txt"; } else { readMeFileName = REP_README_FILE_PREFIX + fileId + ".txt"; } File readMeFile = new File(TEMP_DIR, readMeFileName); DvnReplicationREADMEFileWriter rw = new DvnReplicationREADMEFileWriter(resultInfo); rw.writeREADMEfile(readMeFile, true); zipFileList.add(readMeFile); deleteTempFileList.add(readMeFile); for (File f : zipFileList) { dbgLog.fine("file to zip: path=" + f.getAbsolutePath() + "\tname=" + f.getName()); } // We can now zip all the required files" try { String zipFilePrefix = null; if (resultInfo.get("PID") != null && !resultInfo.get("PID").equals("N/A")) { zipFilePrefix = "zipFile_" + resultInfo.get("PID") + ".zip"; } else { zipFilePrefix = "zipFile_" + fileId + ".zip"; } File zipFile = new File(TEMP_DIR, zipFilePrefix); //deleteTempFileList.add(zipFile); String zfname = zipFile.getName(); zipFileName = zfname; zipFiles(new FileOutputStream(zipFile), zipFileList); /* try { Thread.sleep(1000); } catch (Exception e) { } */ zipResourceDynFileName = new ByteArrayResource( toByteArray(new FileInputStream(zipFile.getAbsolutePath()))); dbgLog.info("Subsetting: zipFileName=" + zipFileName); dbgLog.info("Subsetting: zipFile, absolute path: " + zipFile.getAbsolutePath()); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); // Hide 'Create' button, show 'Download' button: dwnldButton.setRendered(false); dwnloadSubsetButton.setRendered(true); dbgLog.info("***** within dwnldAction(): ends here *****"); /* * Navigation: * - is it necessary to use "faces-redirect" navigation here? * Or should we simply return "" as long as we want to stay * on the subsetting page? if (versionNumber != null) { return "/study/SubsettingPage?faces-redirect=true&dtId=" + dtId + "&versionNumber=" + versionNumber; } return "/study/SubsettingPage?faces-redirect=true&dtId=" + dtId; */ return ""; } catch (IOException e) { // file-access problem, etc. e.printStackTrace(); dbgLog.fine("download zipping IO exception"); msgDwnldButton.setValue("* an IO problem occurred"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to an IO problem "); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } // end of zipping step } catch (IOException e) { e.printStackTrace(); msgDwnldButton.setValue("* an IO problem occurred"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to an IO problem "); getVDCRequestBean().setSelectedTab("tabDwnld"); dvnDSBTimerService.createTimer(deleteTempFileList, TEMP_FILE_LIFETIME); return ""; } // end: params are OK-case } else { // the selection is incomplete // show error message; pgDwnldErrMsg.setRendered(true); msgDwnldButton.setValue("* Error: Select a file format"); msgDwnldButton.setVisible(true); dbgLog.warning("exiting dwnldAction() due to incomplete data "); getVDCRequestBean().setSelectedTab("tabDwnld"); return ""; } // end: checking params }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType2(): start"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from ww w.ja v a 2 s . c o m*/ Map<String, String> printFormatNameTable = new LinkedHashMap<String, String>(); Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: " + j + "-th RT2 unit is being decoded."); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } ///dbgLog.fine("recordType2FixedPart="+ /// ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.warning(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = getNullStrippedString(new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet)); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; // TODO: do we need this "index"? -- 4.0 alpha caseWeightVariableIndex = variableCounter; ///smd.setCaseWeightVariableName(caseWeightVariableName); ///smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = getNullStrippedString(new String( Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet)); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.fine(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; // TODO: // What should we be doing with these "format decimal positions" // in 4.0? // -- L.A. 4.0 alpha int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.fine("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j != OBSUnitsPerCase) { dbgLog.fine("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("RT2 metadata-related exit-chores"); ///smd.getFileInformation().put("varQnty", variableCounter); dataTable.setVarQuantity(new Long(variableCounter)); dbgLog.fine("RT2: varQnty=" + variableCounter); // 4.0 Initialize variables: List<DataVariable> variableList = new ArrayList<DataVariable>(); for (int i = 0; i < variableCounter; i++) { DataVariable dv = new DataVariable(); String varName = variableNameList.get(i); dbgLog.fine("name: " + varName); dv.setName(varName); String varLabel = variableLabelMap.get(varName); if (varLabel != null && varLabel.length() > 255) { // TODO: // variable labels will be changed into type 'TEXT' in the // database - this will eliminate the 255 char. limit. // -- L.A. 4.0 beta11 dbgLog.fine("Have to truncate label: " + varLabel); varLabel = varLabel.substring(0, 255); } dbgLog.fine("label: " + varLabel); dv.setLabel(varLabel); dv.setInvalidRanges(new ArrayList<VariableRange>()); dv.setSummaryStatistics(new ArrayList<SummaryStatistic>()); dv.setUnf("UNF:6:"); dv.setCategories(new ArrayList<VariableCategory>()); variableList.add(dv); dv.setFileOrder(i); dv.setDataTable(dataTable); } dataTable.setDataVariables(variableList); ///smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); ///smd.setVariableLabel(variableLabelMap); // TODO: // figure out what to do with the missing value table! // -- 4.0 alpha // well, they were used to generate merged summary statistics for // the variable. So need to verify what the DDI import was doing // with them and replicate the same in 4.0. // (add appropriate value labels?) ///TODO: 4.0 smd.setMissingValueTable(missingValueTable); ///smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.fine("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); dbgLog.fine("decodeRecordType2(): end"); }