List of usage examples for java.lang Double toHexString
public static String toHexString(double d)
From source file:Main.java
public static void main(String[] args) { System.out.println(Double.toHexString(1.2)); }
From source file:com.uber.stream.kafka.mirrormaker.controller.utils.KafkaStarterUtils.java
public static KafkaServerStartable startServer(final int port, final int brokerId, final String zkStr, final Properties configuration) { // Create the ZK nodes for Kafka, if needed int indexOfFirstSlash = zkStr.indexOf('/'); if (indexOfFirstSlash != -1) { String bareZkUrl = zkStr.substring(0, indexOfFirstSlash); String zkNodePath = zkStr.substring(indexOfFirstSlash); ZkClient client = new ZkClient(bareZkUrl); client.createPersistent(zkNodePath, true); client.close();// w ww . j av a 2s . c o m } File logDir = new File("/tmp/kafka-" + Double.toHexString(Math.random())); logDir.mkdirs(); configureKafkaPort(configuration, port); configureZkConnectionString(configuration, zkStr); configureBrokerId(configuration, brokerId); configureKafkaLogDirectory(configuration, logDir); KafkaConfig config = new KafkaConfig(configuration); KafkaServerStartable serverStartable = new KafkaServerStartable(config); serverStartable.startup(); return serverStartable; }
From source file:com.linkedin.pinot.common.utils.KafkaStarterUtils.java
public static List<KafkaServerStartable> startServers(final int brokerCount, final int port, final String zkStr, final Properties configuration) { List<KafkaServerStartable> startables = new ArrayList<>(brokerCount); for (int i = 0; i < brokerCount; i++) { startables.add(startServer(port + i, i, zkStr, "/tmp/kafka-" + Double.toHexString(Math.random()), configuration));/*from ww w .j a v a 2s . c o m*/ } return startables; }
From source file:com.linkedin.pinot.common.utils.KafkaStarterUtils.java
public static KafkaServerStartable startServer(final int port, final int brokerId, final String zkStr, final Properties configuration) { return startServer(port, brokerId, zkStr, "/tmp/kafka-" + Double.toHexString(Math.random()), configuration); }
From source file:biz.fstechnology.micro.common.jms.RequestMessageCreator.java
/** * @see org.springframework.jms.core.MessageCreator#createMessage(javax.jms.Session) *///from ww w.j a v a 2 s . c o m @Override public Message createMessage(Session session) throws JMSException { ObjectMessage message = session.createObjectMessage(); message.setObject(getContents()); message.setJMSReplyTo(getReplyTo()); if (getRequestId() == null) { setRequestId(Double.toHexString(Math.random())); } message.setJMSCorrelationID(getRequestId()); return message; }
From source file:GeneticAlgorithm.SystemToSolve.java
public void solve_ODE_model(double[] parameters, String mem_address) throws ModelOverdeterminedException, InstantiationException, IllegalAccessException, IllegalArgumentException, NoSuchMethodException, XMLStreamException, IOException { if (SteadyState_OR_TimeCourse_data == true) { //solve model for steady state values for (int i = 0; i < conditions_list.size(); i++) { HashMap Enz_Conc_to_Update = conditions_list.get(i).get_proteins_info(); HashMap BC_true_Met_to_Update = conditions_list.get(i).get_BCTrue_metabolites_info(); if (BC_true_Met_to_Update.size() > 0) { modelreactions.modExtMet(BC_true_Met_to_Update); for (Compound compound : Compounds) { if (compound.getBoundaryCondition() == true) { if (BC_true_Met_to_Update.containsKey(compound.getID()) == true) { compound.setConcentration((double) BC_true_Met_to_Update.get(compound.getID())); }//from w ww . j a v a 2 s .c o m } } } if (Enz_Conc_to_Update.size() > 0) { modelreactions.modEnzymes(Enz_Conc_to_Update); for (ModelReaction rxn : Reactions) { if (Enz_Conc_to_Update.containsKey(rxn.getEnzyme().getID()) == true) { rxn.getEnzyme() .setConcentration((double) Enz_Conc_to_Update.get(rxn.getEnzyme().getID())); } } } String tmpname = mem_address; tmpname += "_" + i + ".xml"; File tmpxml = new File(tmpname); if (tmpxml.exists()) { String rando = Double.toHexString(RN(0.01, 10) * RN(0.01, 10)).replaceAll("\\.", "_"); tmpname += rando; tmpxml = new File(tmpname); } if (BC_true_Met_to_Update.size() > 0 || Enz_Conc_to_Update.size() > 0) { modelreactions.exportsbml(tmpxml); link = tmpname; } else { link = originallink; } sosLIBssSolver steadystateresults = new sosLIBssSolver(Compounds, Reactions, parameters, link); if (steadystateresults.solveSS() == true) { conditions_list.get(i).store_Solved_metmap(steadystateresults.getSolvedMMap()); conditions_list.get(i).store_Solved_fluxmap(steadystateresults.getSolvedFMap()); good_OR_bad_solution_forModel = true; } else { good_OR_bad_solution_forModel = false; } tmpxml.delete(); } } else { //solve model for time series output/values double[] time = data.returnTime(); double[] parameter = parameters; int activeCompNum = 0; for (Compound comp : Compounds) { if (comp.getBoundaryCondition() == false) { activeCompNum++; } } String[] paranames = modelreactions.getParameterNames(); String[] reactionID = modelreactions.getReactionID(); String[] variables = new String[activeCompNum]; String[] reactionID2 = modelreactions.getReactionID2(); int count = 0; for (Compound comp : Compounds) { if (comp.getBoundaryCondition() == false) { variables[count] = comp.getID(); count++; } } TCodesolve ode = new TCodesolve(link, variables, getParametersCount(), paranames, reactionID, reactionID2); double output[][] = ode.runsolver(parameter, time); for (int j = 1; j < variables.length + 1 + reactionID2.length; j++) { double[] temparray = new double[time.length]; for (int k = 0; k < time.length; k++) { temparray[k] = output[k][j]; } if ((j - 1) < activeCompNum) { TCestMetMap.put(variables[j - 1], temparray); } else { TCestFluxMap.put(reactionID2[j - activeCompNum - 1], temparray); } } } }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType2(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w. ja va2s . c o m Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: \n\n+++++++++++ " + j + "-th RT2 unit is to be decoded +++++++++++"); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } dbgLog.fine("recordType2FixedPart=" + ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.info(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; //throw new IOException("RT2 reading error: The current position is no longer Record Type 2"); } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; caseWeightVariableIndex = variableCounter; smd.setCaseWeightVariableName(caseWeightVariableName); smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.info(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.info("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j == OBSUnitsPerCase) { dbgLog.fine("RT2 metadata-related exit-chores"); smd.getFileInformation().put("varQnty", variableCounter); varQnty = variableCounter; dbgLog.fine("RT2: varQnty=" + varQnty); smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); smd.setVariableLabel(variableLabelMap); smd.setMissingValueTable(missingValueTable); smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.info("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); smd.setVariableFormat(printFormatList); smd.setVariableFormatName(printFormatNameTable); dbgLog.info("<<<<<<"); dbgLog.info("printFormatList = " + printFormatList); dbgLog.info("printFormatNameTable = " + printFormatNameTable); // dbgLog.info("formatCategoryTable = " + formatCategoryTable); dbgLog.info(">>>>>>"); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); // variableType is determined after the valueTable is finalized } else { dbgLog.info("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("***** decodeRecordType2(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType2(): start"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from ww w. j a va 2 s . c o m Map<String, String> printFormatNameTable = new LinkedHashMap<String, String>(); Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: " + j + "-th RT2 unit is being decoded."); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } ///dbgLog.fine("recordType2FixedPart="+ /// ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.warning(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = getNullStrippedString(new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet)); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; // TODO: do we need this "index"? -- 4.0 alpha caseWeightVariableIndex = variableCounter; ///smd.setCaseWeightVariableName(caseWeightVariableName); ///smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = getNullStrippedString(new String( Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet)); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.fine(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; // TODO: // What should we be doing with these "format decimal positions" // in 4.0? // -- L.A. 4.0 alpha int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.fine("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j != OBSUnitsPerCase) { dbgLog.fine("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("RT2 metadata-related exit-chores"); ///smd.getFileInformation().put("varQnty", variableCounter); dataTable.setVarQuantity(new Long(variableCounter)); dbgLog.fine("RT2: varQnty=" + variableCounter); // 4.0 Initialize variables: List<DataVariable> variableList = new ArrayList<DataVariable>(); for (int i = 0; i < variableCounter; i++) { DataVariable dv = new DataVariable(); String varName = variableNameList.get(i); dbgLog.fine("name: " + varName); dv.setName(varName); String varLabel = variableLabelMap.get(varName); if (varLabel != null && varLabel.length() > 255) { // TODO: // variable labels will be changed into type 'TEXT' in the // database - this will eliminate the 255 char. limit. // -- L.A. 4.0 beta11 dbgLog.fine("Have to truncate label: " + varLabel); varLabel = varLabel.substring(0, 255); } dbgLog.fine("label: " + varLabel); dv.setLabel(varLabel); dv.setInvalidRanges(new ArrayList<VariableRange>()); dv.setSummaryStatistics(new ArrayList<SummaryStatistic>()); dv.setUnf("UNF:6:"); dv.setCategories(new ArrayList<VariableCategory>()); variableList.add(dv); dv.setFileOrder(i); dv.setDataTable(dataTable); } dataTable.setDataVariables(variableList); ///smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); ///smd.setVariableLabel(variableLabelMap); // TODO: // figure out what to do with the missing value table! // -- 4.0 alpha // well, they were used to generate merged summary statistics for // the variable. So need to verify what the DDI import was doing // with them and replicate the same in 4.0. // (add appropriate value labels?) ///TODO: 4.0 smd.setMissingValueTable(missingValueTable); ///smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.fine("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); dbgLog.fine("decodeRecordType2(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType7(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType7(): start *****"); int counter = 0; int[] headerSection = new int[2]; // the variables below may no longer needed; // but they may be useful for debugging/logging purposes. /// // RecordType 7 /// // Subtype 3 /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>(); /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>(); /// // Subytpe 4 /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>(); /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>(); //Subtype 11//from w w w . j a v a 2s. com /// List<Integer> measurementLevel = new ArrayList<Integer>(); /// List<Integer> columnWidth = new ArrayList<Integer>(); /// List<Integer> alignment = new ArrayList<Integer>(); Map<String, String> shortToLongVarialbeNameTable = new LinkedHashMap<String, String>(); while (true) { try { if (stream == null) { throw new IllegalArgumentException("RT7: stream == null!"); } // first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 7.0 check the first 4 bytes byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE]; int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); // to-do check against nbytes //printHexDump(headerCodeRt7, "RT7 header test"); ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); if (isLittleEndian) { bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN); } int intRT7test = bb_header_code_rt7.getInt(); dbgLog.fine("RT7: header test value=" + intRT7test); if (intRT7test != 7) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT7test failed=" + intRT7test); dbgLog.fine("counter=" + counter); stream.reset(); return; //} } // 7.1 check 4-byte integer Sub-Type Code byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE]; int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes"); ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); if (isLittleEndian) { bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN); } int subTypeCode = bb_sub_type_code.getInt(); dbgLog.fine("RT7: subTypeCode=" + subTypeCode); switch (subTypeCode) { case 3: // 3: Release andMachine-Specific Integer Information //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } String dataInHex = new String(Hex.encodeHex(bb_field.array())); /// releaseMachineSpecificInfoHex.add(dataInHex); dbgLog.finer("raw bytes in Hex:" + dataInHex); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); /// releaseMachineSpecificInfo.add(fieldData); } } /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo); /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex); } else { // throw new IOException } dbgLog.fine("***** end of subType 3 ***** \n"); break; case 4: // Release andMachine-SpecificOBS-Type Information headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i)); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); dbgLog.finer("byte order=" + bb_field.order().toString()); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer bb_field_dup = bb_field.duplicate(); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), new String(Hex.encodeHex(bb_field.array()))); // dbgLog.finer("raw bytes in Hex:"+ // OBSTypeHexValue.get(RecordType7SubType4Fields.get(i))); if (unitLength == 8) { double fieldData = bb_field.getDouble(); /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData); dbgLog.finer("fieldData(double)=" + fieldData); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), Double.toHexString(fieldData)); dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData)); } } /// dbgLog.fine("OBSTypeValue="+OBSTypeValue); /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue); } else { // throw new IOException } dbgLog.fine("***** end of subType 4 ***** \n"); break; case 5: // Variable Sets Information parseRT7SubTypefield(stream); break; case 6: // Trends date information parseRT7SubTypefield(stream); break; case 7: // Multiple response groups parseRT7SubTypefield(stream); break; case 8: // Windows Data Entry data parseRT7SubTypefield(stream); break; case 9: // parseRT7SubTypefield(stream); break; case 10: // TextSmart data parseRT7SubTypefield(stream); break; case 11: // Msmt level, col width, & alignment //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); int remainder = i % 3; dbgLog.finer("remainder=" + remainder); if (remainder == 0) { /// measurementLevel.add(fieldData); } else if (remainder == 1) { /// columnWidth.add(fieldData); } else if (remainder == 2) { /// alignment.add(fieldData); } } } } else { // throw new IOException } /// dbgLog.fine("measurementLevel="+measurementLevel); /// dbgLog.fine("columnWidth="+columnWidth); /// dbgLog.fine("alignment="+alignment); dbgLog.fine("***** end of subType 11 ***** \n"); break; case 12: // Windows Data Entry GUID parseRT7SubTypefield(stream); break; case 13: // Extended variable names // parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t"); for (int i = 0; i < variableShortLongNamePairs.length; i++) { dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]); String[] pair = variableShortLongNamePairs[i].split("="); shortToLongVarialbeNameTable.put(pair[0], pair[1]); } dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVarialbeNameTable); smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable); } else { // throw new IOException } break; case 14: // Extended strings //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7.14: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t"); for (int i = 0; i < extendedVariablesSizePairs.length; i++) { dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]); if (extendedVariablesSizePairs[i].indexOf("=") > 0) { String[] pair = extendedVariablesSizePairs[i].split("="); extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1])); } } dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable); } else { // throw new IOException } break; case 15: // Clementine Metadata parseRT7SubTypefield(stream); break; case 16: // 64 bit N of cases parseRT7SubTypefield(stream); break; case 17: // File level attributes parseRT7SubTypefield(stream); break; case 18: // Variable attributes parseRT7SubTypefield(stream); break; case 19: // Extended multiple response groups parseRT7SubTypefield(stream); break; case 20: // Encoding, aka code page parseRT7SubTypefield(stream); /* TODO: This needs to be researched; * Is this field really used, ever? headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null){ int unitLength = headerSection[0]; dbgLog.fine("RT7-20: unitLength="+unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7-20: numberOfUnits="+numberOfUnits); byte[] rt7st20bytes = new byte[unitLength*numberOfUnits]; int nbytes20 = stream.read(rt7st20bytes); String dataCharSet = new String(rt7st20bytes,"US-ASCII"); if (dataCharSet != null && !(dataCharSet.equals(""))) { dbgLog.fine("RT7-20: data charset: "+ dataCharSet); defaultCharSet = dataCharSet; } } else { // throw new IOException } * */ break; case 21: // Value labels for long strings parseRT7SubTypefield(stream); break; case 22: // Missing values for long strings parseRT7SubTypefield(stream); break; default: parseRT7SubTypefield(stream); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } counter++; if (counter > 20) { break; } } dbgLog.fine("RT7: counter=" + counter); dbgLog.fine("RT7: ***** decodeRecordType7(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType7(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType7(): start"); int counter = 0; int[] headerSection = new int[2]; // the variables below may no longer needed; // but they may be useful for debugging/logging purposes. /// // RecordType 7 /// // Subtype 3 /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>(); /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>(); /// // Subytpe 4 /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>(); /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>(); //Subtype 11/*w w w.j av a 2 s .c o m*/ /// List<Integer> measurementLevel = new ArrayList<Integer>(); /// List<Integer> columnWidth = new ArrayList<Integer>(); /// List<Integer> alignment = new ArrayList<Integer>(); while (true) { try { if (stream == null) { throw new IllegalArgumentException("RT7: stream == null!"); } // first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 7.0 check the first 4 bytes byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE]; int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); // to-do check against nbytes //printHexDump(headerCodeRt7, "RT7 header test"); ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); if (isLittleEndian) { bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN); } int intRT7test = bb_header_code_rt7.getInt(); dbgLog.fine("RT7: header test value=" + intRT7test); if (intRT7test != 7) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT7test failed=" + intRT7test); dbgLog.fine("counter=" + counter); stream.reset(); return; //} } // 7.1 check 4-byte integer Sub-Type Code byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE]; int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes"); ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); if (isLittleEndian) { bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN); } int subTypeCode = bb_sub_type_code.getInt(); dbgLog.fine("RT7: subTypeCode=" + subTypeCode); switch (subTypeCode) { case 3: // 3: Release andMachine-Specific Integer Information //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } String dataInHex = new String(Hex.encodeHex(bb_field.array())); /// releaseMachineSpecificInfoHex.add(dataInHex); dbgLog.finer("raw bytes in Hex:" + dataInHex); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); /// releaseMachineSpecificInfo.add(fieldData); } } /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo); /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex); } else { // throw new IOException } dbgLog.fine("***** end of subType 3 ***** \n"); break; case 4: // Release andMachine-SpecificOBS-Type Information headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i)); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); dbgLog.finer("byte order=" + bb_field.order().toString()); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer bb_field_dup = bb_field.duplicate(); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), new String(Hex.encodeHex(bb_field.array()))); // dbgLog.finer("raw bytes in Hex:"+ // OBSTypeHexValue.get(RecordType7SubType4Fields.get(i))); if (unitLength == 8) { double fieldData = bb_field.getDouble(); /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData); dbgLog.finer("fieldData(double)=" + fieldData); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), Double.toHexString(fieldData)); dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData)); } } /// dbgLog.fine("OBSTypeValue="+OBSTypeValue); /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue); } else { // throw new IOException } dbgLog.fine("***** end of subType 4 ***** \n"); break; case 5: // Variable Sets Information parseRT7SubTypefield(stream); break; case 6: // Trends date information parseRT7SubTypefield(stream); break; case 7: // Multiple response groups parseRT7SubTypefield(stream); break; case 8: // Windows Data Entry data parseRT7SubTypefield(stream); break; case 9: // parseRT7SubTypefield(stream); break; case 10: // TextSmart data parseRT7SubTypefield(stream); break; case 11: // Msmt level, col width, & alignment //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); int remainder = i % 3; dbgLog.finer("remainder=" + remainder); if (remainder == 0) { /// measurementLevel.add(fieldData); } else if (remainder == 1) { /// columnWidth.add(fieldData); } else if (remainder == 2) { /// alignment.add(fieldData); } } } } else { // throw new IOException } /// dbgLog.fine("measurementLevel="+measurementLevel); /// dbgLog.fine("columnWidth="+columnWidth); /// dbgLog.fine("alignment="+alignment); dbgLog.fine("end of subType 11\n"); break; case 12: // Windows Data Entry GUID parseRT7SubTypefield(stream); break; case 13: // Extended variable names // parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t"); for (int i = 0; i < variableShortLongNamePairs.length; i++) { dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]); String[] pair = variableShortLongNamePairs[i].split("="); shortToLongVariableNameTable.put(pair[0], pair[1]); } dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVariableNameTable); // We are saving the short-to-long name map; at the // end of ingest, we'll go through the data variables and // change the names accordingly. // smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable); } else { // throw new IOException } break; case 14: // Extended strings //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7.14: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t"); for (int i = 0; i < extendedVariablesSizePairs.length; i++) { dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]); if (extendedVariablesSizePairs[i].indexOf("=") > 0) { String[] pair = extendedVariablesSizePairs[i].split("="); extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1])); } } dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable); } else { // throw new IOException } break; case 15: // Clementine Metadata parseRT7SubTypefield(stream); break; case 16: // 64 bit N of cases parseRT7SubTypefield(stream); break; case 17: // File level attributes parseRT7SubTypefield(stream); break; case 18: // Variable attributes parseRT7SubTypefield(stream); break; case 19: // Extended multiple response groups parseRT7SubTypefield(stream); break; case 20: // Character encoding, aka code page. // Must be a version 16+ feature (?). // Starting v.16, the default character encoding for SAV // files is UTF-8; but then it is possible to specify an // alternative encoding here. // A typical use case would be people setting it to "ISO-Latin" // or "windows-1252", or a similar 8-bit encoding to store // text with standard Western European accents. // -- L.A. headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7-20: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7-20: numberOfUnits=" + numberOfUnits); byte[] rt7st20bytes = new byte[unitLength * numberOfUnits]; int nbytes20 = stream.read(rt7st20bytes); String dataCharSet = new String(rt7st20bytes, "US-ASCII"); if (dataCharSet != null && !(dataCharSet.equals(""))) { dbgLog.fine("RT7-20: data charset: " + dataCharSet); defaultCharSet = dataCharSet; } } /*else { // TODO: // decide if the exception should actually be thrown here! // -- L.A. 4.0 beta // throw new IOException }*/ break; case 21: // Value labels for long strings parseRT7SubTypefield(stream); break; case 22: // Missing values for long strings parseRT7SubTypefield(stream); break; default: parseRT7SubTypefield(stream); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } counter++; if (counter > 20) { break; } } dbgLog.fine("RT7: counter=" + counter); dbgLog.fine("RT7: decodeRecordType7(): end"); }