List of usage examples for org.apache.commons.lang StringUtils stripEnd
public static String stripEnd(String str, String stripChars)
Strips any of a set of characters from the end of a String.
From source file:edu.ku.brc.specify.web.SpecifyExplorer.java
/** * @param data/*from w ww . ja va 2 s . c o m*/ * @return */ protected String formatValue(final Object data) { if (data instanceof FormDataObjIFace) { return formatFDI((FormDataObjIFace) data); } else if (data instanceof String || data instanceof Integer) { return data.toString(); } else if (data instanceof Date) { return dateFormatter.format((Date) data); } else if (data instanceof Calendar) { return dateFormatter.format(((Calendar) data).getTime()); } else if (data instanceof Float) { return String.format("%5.2f", data); } else if (data instanceof Double) { return String.format("%5.2f", data); } else if (data instanceof BigDecimal) { return StringUtils.stripEnd(data.toString(), "0"); } return data.toString(); }
From source file:com.mxhero.plugin.cloudstorage.onedrive.api.Items.java
/** * Clean and encode and short path.// w w w . ja v a 2s.c o m * * @param reservedCharactersType the reserved characters type * @param path the path * @return the string */ public static String cleanAndEncodeAndShortPath(ReservedCharactersType reservedCharactersType, String path) { if (path != null) { String processedPath = cleanAndEncodePath(reservedCharactersType, path); if (processedPath.length() > Integer.parseInt(ApiEnviroment.maxFileAndFolderLenght.getValue())) { processedPath = processedPath.substring(0, Integer.parseInt(ApiEnviroment.maxFileAndFolderLenght.getValue())); } return StringUtils.stripEnd(processedPath, "."); } return null; }
From source file:com.gemstone.gemfire.management.internal.cli.GfshParser.java
/** * */// w w w .j a v a 2 s .c o m public ParseResult parse(String userInput) { GfshParseResult parseResult = null; // First remove the trailing white spaces userInput = StringUtils.stripEnd(userInput, null); if ((ParserUtils.contains(userInput, SyntaxConstants.COMMAND_DELIMITER) && StringUtils.endsWithIgnoreCase(userInput, SyntaxConstants.COMMAND_DELIMITER))) { userInput = StringUtils.removeEnd(userInput, SyntaxConstants.COMMAND_DELIMITER); } try { boolean error = false; CliCommandOptionException coe = null; List<CommandTarget> targets = locateTargets(ParserUtils.trimBeginning(userInput), false); if (targets.size() > 1) { if (userInput.length() > 0) { handleCondition(CliStrings.format( CliStrings.GFSHPARSER__MSG__AMBIGIOUS_COMMAND_0_FOR_ASSISTANCE_USE_1_OR_HINT_HELP, new Object[] { userInput, AbstractShell.completionKeys }), CommandProcessingException.COMMAND_NAME_AMBIGUOUS, userInput); } } else { if (targets.size() == 1) { OptionSet parse = null; List<MethodParameter> parameters = new ArrayList<MethodParameter>(); Map<String, String> paramValMap = new HashMap<String, String>(); CommandTarget commandTarget = targets.get(0); GfshMethodTarget gfshMethodTarget = commandTarget.getGfshMethodTarget(); preConfigureConverters(commandTarget); try { parse = commandTarget.getOptionParser().parse(gfshMethodTarget.getRemainingBuffer()); } catch (CliException ce) { if (ce instanceof CliCommandOptionException) { coe = (CliCommandOptionException) ce; coe.setCommandTarget(commandTarget); parse = coe.getOptionSet(); error = true; } } try { checkOptionSetForValidCommandModes(parse, commandTarget); } catch (CliCommandMultiModeOptionException ce) { error = true; coe = ce; } error = processArguments(parse, commandTarget, paramValMap, parameters, error); error = processOptions(parse, commandTarget, paramValMap, parameters, error); if (!error) { Object[] methodParameters = new Object[parameters.size()]; for (MethodParameter parameter : parameters) { methodParameters[parameter.getParameterNo()] = parameter.getParameter(); } parseResult = new GfshParseResult(gfshMethodTarget.getMethod(), gfshMethodTarget.getTarget(), methodParameters, userInput, commandTarget.getCommandName(), paramValMap); } else { if (coe != null) { logWrapper.fine("Handling exception: " + coe.getMessage()); ExceptionHandler.handleException(coe); // ExceptionHandler.handleException() only logs it on console. // When on member, we need to handle this. if (!CliUtil.isGfshVM()) { handleCondition( CliStrings.format(CliStrings.GFSHPARSER__MSG__INVALID_COMMAND_STRING_0, userInput), coe, CommandProcessingException.COMMAND_INVALID, userInput); } } } } else { String message = CliStrings.format(CliStrings.GFSHPARSER__MSG__COMMAND_0_IS_NOT_VALID, userInput); CommandTarget commandTarget = locateExactMatchingTarget(userInput); if (commandTarget != null) { String commandName = commandTarget.getCommandName(); AvailabilityTarget availabilityIndicator = commandTarget.getAvailabilityIndicator(); message = CliStrings.format(CliStrings.GFSHPARSER__MSG__0_IS_NOT_AVAILABLE_REASON_1, new Object[] { commandName, availabilityIndicator.getAvailabilityDescription() }); } handleCondition(message, CommandProcessingException.COMMAND_INVALID_OR_UNAVAILABLE, userInput); } } } catch (IllegalArgumentException e1) { logWrapper.warning(CliUtil.stackTraceAsString(e1)); } catch (IllegalAccessException e1) { logWrapper.warning(CliUtil.stackTraceAsString(e1)); } catch (InvocationTargetException e1) { logWrapper.warning(CliUtil.stackTraceAsString(e1)); } return parseResult; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType2(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }// w w w . j a v a 2s. c o m Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: \n\n+++++++++++ " + j + "-th RT2 unit is to be decoded +++++++++++"); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } dbgLog.fine("recordType2FixedPart=" + ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.info(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; //throw new IOException("RT2 reading error: The current position is no longer Record Type 2"); } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; caseWeightVariableIndex = variableCounter; smd.setCaseWeightVariableName(caseWeightVariableName); smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.info(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.info("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j == OBSUnitsPerCase) { dbgLog.fine("RT2 metadata-related exit-chores"); smd.getFileInformation().put("varQnty", variableCounter); varQnty = variableCounter; dbgLog.fine("RT2: varQnty=" + varQnty); smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); smd.setVariableLabel(variableLabelMap); smd.setMissingValueTable(missingValueTable); smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.info("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); smd.setVariableFormat(printFormatList); smd.setVariableFormatName(printFormatNameTable); dbgLog.info("<<<<<<"); dbgLog.info("printFormatList = " + printFormatList); dbgLog.info("printFormatNameTable = " + printFormatNameTable); // dbgLog.info("formatCategoryTable = " + formatCategoryTable); dbgLog.info(">>>>>>"); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); // variableType is determined after the valueTable is finalized } else { dbgLog.info("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("***** decodeRecordType2(): end *****"); }
From source file:com.jaspersoft.jasperserver.ws.axis2.ManagementServiceImpl.java
private Map<String, JRParameter> getJRParametersFromDatasource(ResourceDescriptor reportReference) { Map parametersMap = new HashMap(); ReportUnit reportUnit = (ReportUnit) repository.getResource(null, reportReference.getUriString(), ReportUnit.class); if (reportUnit.getDataSource() == null || reportUnit.getDataSource().getReferenceURI() == null) { return parametersMap; }/* w w w . ja va2 s. c om*/ String jrxmlURI = StringUtils.stripEnd(reportUnit.getDataSource().getReferenceURI(), "/") + "_files/topicJRXML"; FileResource jrxmlResource = (FileResource) reportLoadingService.getRepositoryService() .getResource(ExecutionContextImpl.getRuntimeExecutionContext(), jrxmlURI, FileResource.class); if (jrxmlResource == null) { return parametersMap; } JRReport report = reportLoadingService.getJasperReport(null, jrxmlResource, false); Map dsParametersMap = getReportJRParameters(report); parametersMap.putAll(dsParametersMap); return parametersMap; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType2(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType2(): start"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//w ww . ja va 2s.c om Map<String, String> printFormatNameTable = new LinkedHashMap<String, String>(); Map<String, String> variableLabelMap = new LinkedHashMap<String, String>(); Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>(); List<Integer> printFormatList = new ArrayList<Integer>(); String caseWeightVariableName = null; int caseWeightVariableIndex = 0; boolean lastVariableIsExtendable = false; boolean extendedVariableMode = false; boolean obs255 = false; String lastVariableName = null; String lastExtendedVariable = null; // this field repeats as many as the number of variables in // this sav file // (note that the above statement is not technically correct, this // record repeats not just for every variable in the file, but for // every OBS (8 byte unit); i.e., if a string is split into multiple // OBS units, each one will have its own RT2 record -- L.A.). // Each field constists of a fixed (32-byte) segment and // then a few variable segments: // if the variable has a label (3rd INT4 set to 1), then there's 4 more // bytes specifying the length of the label, and then that many bytes // holding the label itself (no more than 256). // Then if there are optional missing value units (4th INT4 set to 1) // there will be 3 more OBS units attached = 24 extra bytes. int variableCounter = 0; int obsSeqNumber = 0; int j; dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units."); for (j = 0; j < OBSUnitsPerCase; j++) { dbgLog.fine("RT2: " + j + "-th RT2 unit is being decoded."); // 2.0: read the fixed[=non-optional] 32-byte segment byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED]; try { int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED); //printHexDump(recordType2Fixed, "recordType2 part 1"); if (nbytes == 0) { throw new IOException("reading recordType2: no bytes read!"); } int offset = 0; // 2.1: create int-view of the bytebuffer for the first 16-byte segment int rt2_1st_4_units = 4; ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units]; int[] recordType2FixedPart1 = new int[rt2_1st_4_units]; for (int i = 0; i < rt2_1st_4_units; i++) { bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK); offset += LENGTH_SAV_INT_BLOCK; if (isLittleEndian) { bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN); } recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt(); } ///dbgLog.fine("recordType2FixedPart="+ /// ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE)); // 1st ([0]) element must be 2 otherwise no longer Record Type 2 if (recordType2FixedPart1[0] != 2) { dbgLog.warning(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]); break; } dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]); // 2.3 variable name: 8 byte(space[x20]-padded) // This field is located at the very end of the 32 byte // fixed-size RT2 header (bytes 24-31). // We are processing it now, so that // we can make the decision on whether this variable is part // of a compound variable: String RawVariableName = getNullStrippedString(new String( Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet)); //offset +=LENGTH_VARIABLE_NAME; String variableName = null; if (RawVariableName.indexOf(' ') >= 0) { variableName = RawVariableName.substring(0, RawVariableName.indexOf(' ')); } else { variableName = RawVariableName; } // 2nd ([1]) element: numeric variable = 0 :for string variable // this block indicates its datum-length, i.e, >0 ; // if -1, this RT2 unit is a non-1st RT2 unit for a string variable // whose value is longer than 8 character. boolean isNumericVariable = false; dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]); //OBSwiseTypelList.add(recordType2FixedPart1[1]); int HowManyRt2Units = 1; if (recordType2FixedPart1[1] == -1) { dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable"); if (obs255) { if (obsSeqNumber < 30) { OBSwiseTypelList.add(recordType2FixedPart1[1]); obsSeqNumber++; } else { OBSwiseTypelList.add(-2); obs255 = false; obsSeqNumber = 0; } } else { OBSwiseTypelList.add(recordType2FixedPart1[1]); } obsNonVariableBlockSet.add(j); continue; } else if (recordType2FixedPart1[1] == 0) { // This is a numeric variable extendedVariableMode = false; // And as such, it cannot be an extension of a // previous, long string variable. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; isNumericVariable = true; variableTypelList.add(recordType2FixedPart1[1]); } else if (recordType2FixedPart1[1] > 0) { // This looks like a regular string variable. However, // it may still be a part of a compound variable // (a String > 255 bytes that was split into 255 byte // chunks, stored as individual String variables). if (recordType2FixedPart1[1] == 255) { obs255 = true; } if (lastVariableIsExtendable) { String varNameBase = null; if (lastVariableName.length() > 5) { varNameBase = lastVariableName.substring(0, 5); } else { varNameBase = lastVariableName; } if (extendedVariableMode) { if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) { OBSwiseTypelList.add(-1); lastExtendedVariable = variableName; // OK, we stay in the "extended variable" mode; // but we can't move on to the next OBS (hence the commented out // "continue" below: //continue; // see the next comment below for the explanation. // // Should we also set "extendable" flag to false at this point // if it's shorter than 255 bytes, i.e. the last extended chunk? } else { extendedVariableMode = false; } } else { if (variableNameIsAnIncrement(varNameBase, variableName)) { OBSwiseTypelList.add(-1); extendedVariableMode = true; dbgLog.fine("RT2: in extended variable mode; variable " + variableName); lastExtendedVariable = variableName; // Before we move on to the next OBS unit, we need to check // if this current extended variable has its own label specified; // If so, we need to determine its length, then read and skip // that many bytes. // Hence the commented out "continue" below: //continue; } } } if (!extendedVariableMode) { // OK, this is a "real" // string variable, and not a continuation chunk of a compound // string. OBSwiseTypelList.add(recordType2FixedPart1[1]); variableCounter++; if (recordType2FixedPart1[1] == 255) { // This variable is 255 bytes long, i.e. this is // either the single "atomic" variable of the // max allowed size, or it's a 255 byte segment // of a compound variable. So we will check // the next variable and see if it is the continuation // of this one. lastVariableIsExtendable = true; } else { lastVariableIsExtendable = false; } if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK; } else { HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1; } variableTypelList.add(recordType2FixedPart1[1]); } } if (!extendedVariableMode) { // Again, we only want to do the following steps for the "real" // variables, not the chunks of split mega-variables: dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units); lastVariableName = variableName; // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases if (j == (caseWeightVariableOBSIndex - 1)) { caseWeightVariableName = variableName; // TODO: do we need this "index"? -- 4.0 alpha caseWeightVariableIndex = variableCounter; ///smd.setCaseWeightVariableName(caseWeightVariableName); ///smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex); } OBSIndexToVariableName.put(j, variableName); //dbgLog.fine("\nvariable name="+variableName+"<-"); dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-"); dbgLog.fine("RT2: raw variable: " + RawVariableName); variableNameList.add(variableName); } // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label // dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]); boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false; if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) { throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]); } // 2.4 [optional]The length of a variable label followed: 4-byte int // 3rd element of 2.1 indicates whether this field exists // *** warning: The label block is padded to a multiple of the 4-byte // NOT the raw integer value of this 4-byte block if (hasVariableLabel) { byte[] length_variable_label = new byte[4]; int nbytes_2_4 = stream.read(length_variable_label); if (nbytes_2_4 == 0) { throw new IOException("RT 2: error reading recordType2.4: no bytes read!"); } else { dbgLog.fine("nbytes_2_4=" + nbytes_2_4); } ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0, LENGTH_VARIABLE_LABEL); if (isLittleEndian) { bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN); } int rawVariableLabelLength = bb_length_variable_label.getInt(); dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength); int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength); dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength); // 2.5 [optional]variable label whose length is found at 2.4 String variableLabel = ""; if (rawVariableLabelLength > 0) { byte[] variable_label = new byte[variableLabelLength]; int nbytes_2_5 = stream.read(variable_label); if (nbytes_2_5 == 0) { throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength + " bytes requested, no bytes read!"); } else { dbgLog.fine("nbytes_2_5=" + nbytes_2_5); } variableLabel = getNullStrippedString(new String( Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet)); dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-"); dbgLog.fine(variableName + " => " + variableLabel); } else { dbgLog.fine("RT2: defaulting to empty variable label."); } if (!extendedVariableMode) { // We only have any use for this label if it's a "real" variable. // Thinking about it, it doesn't make much sense for the "fake" // variables that are actually chunks of large strings to store // their own labels. But in some files they do. Then failing to read // the bytes would result in getting out of sync with the RT record // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),"US-ASCII");*/ variableLabelMap.put(variableName, variableLabel); } } if (extendedVariableMode) { // there's nothing else left for us to do in this iteration of the loop. // Once again, this was not a real variable, but a dummy variable entry // created for a chunk of a string variable longer than 255 bytes -- // that's how SPSS stores them. continue; } // 4th ([3]) element: Missing value type code // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point] dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]); boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3]) && (recordType2FixedPart1[3] != 0)) ? true : false; InvalidData invalidDataInfo = null; if (recordType2FixedPart1[3] != 0) { invalidDataInfo = new InvalidData(recordType2FixedPart1[3]); dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType()); } // 2.2: print/write formats: 4-byte each = 8 bytes byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE); dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt))); offset += LENGTH_PRINT_FORMAT_CODE; int formatCode = isLittleEndian ? printFormt[2] : printFormt[1]; int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2]; // TODO: // What should we be doing with these "format decimal positions" // in 4.0? // -- L.A. 4.0 alpha int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3]; dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode); formatDecimalPointPositionList.add(formatDecimalPointPosition); if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) { throw new IOException("Unknown format code was found = " + formatCode); } else { printFormatList.add(formatCode); } byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE); dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt))); if (writeFormt[3] != 0x00) { dbgLog.fine("byte-order(write format): reversal required"); } offset += LENGTH_WRITE_FORMAT_CODE; if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) { StringBuilder sb = new StringBuilder( SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth); if (formatDecimalPointPosition > 0) { sb.append("." + formatDecimalPointPosition); } dbgLog.fine("formattable[i] = " + variableName + " -> " + sb.toString()); printFormatNameTable.put(variableName, sb.toString()); } printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)); // 2.6 [optional] missing values:4-byte each if exists // 4th element of 2.1 indicates the structure of this sub-field // Should we perhaps check for this for the "fake" variables too? // if (hasMissingValues) { dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]); int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]); //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] : 0; dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits); byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits]; int nbytes_2_6 = stream.read(missing_value_code_units); if (nbytes_2_6 == 0) { throw new IOException("RT 2: reading recordType2.6: no byte was read"); } else { dbgLog.fine("nbytes_2_6=" + nbytes_2_6); } //printHexDump(missing_value_code_units, "missing value"); if (isNumericVariable) { double[] missingValues = new double[howManyMissingValueUnits]; //List<String> mvp = new ArrayList<String>(); List<String> mv = new ArrayList<String>(); ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits]; int offset_start = 0; for (int i = 0; i < howManyMissingValueUnits; i++) { bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start, LENGTH_SAV_OBS_BLOCK); offset_start += LENGTH_SAV_OBS_BLOCK; if (isLittleEndian) { bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer temp = bb_missig_value_code[i].duplicate(); missingValues[i] = bb_missig_value_code[i].getDouble(); if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) { dbgLog.fine("1st value is LOWEST"); mv.add(Double.toHexString(missingValues[i])); } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) { dbgLog.fine("2nd value is HIGHEST"); mv.add(Double.toHexString(missingValues[i])); } else { mv.add(doubleNumberFormatter.format(missingValues[i])); } dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i])); } dbgLog.fine("variableName=" + variableName); if (recordType2FixedPart1[3] > 0) { // point cases only dbgLog.fine("mv(>0)=" + mv); missingValueTable.put(variableName, mv); invalidDataInfo.setInvalidValues(mv); } else if (recordType2FixedPart1[3] == -2) { dbgLog.fine("mv(-2)=" + mv); // range invalidDataInfo.setInvalidRange(mv); } else if (recordType2FixedPart1[3] == -3) { // mixed case dbgLog.fine("mv(-3)=" + mv); invalidDataInfo.setInvalidRange(mv.subList(0, 2)); invalidDataInfo.setInvalidValues(mv.subList(2, 3)); missingValueTable.put(variableName, mv.subList(2, 3)); } dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo); invalidDataTable.put(variableName, invalidDataInfo); } else { // string variable case String[] missingValues = new String[howManyMissingValueUnits]; List<String> mv = new ArrayList<String>(); int offset_start = 0; int offset_end = LENGTH_SAV_OBS_BLOCK; for (int i = 0; i < howManyMissingValueUnits; i++) { missingValues[i] = StringUtils.stripEnd(new String( Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end), defaultCharSet), " "); dbgLog.fine("missing value=" + missingValues[i] + "<-"); offset_start = offset_end; offset_end += LENGTH_SAV_OBS_BLOCK; mv.add(missingValues[i]); } invalidDataInfo.setInvalidValues(mv); missingValueTable.put(variableName, mv); invalidDataTable.put(variableName, invalidDataInfo); dbgLog.fine( "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|")); dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo); } // string case dbgLog.fine("invalidDataTable:\n" + invalidDataTable); } // if msv } catch (IOException ex) { //ex.printStackTrace(); throw ex; } catch (Exception ex) { ex.printStackTrace(); // should we be throwing some exception here? } } // j-loop if (j != OBSUnitsPerCase) { dbgLog.fine("RT2: attention! didn't reach the end of the OBS list!"); throw new IOException("RT2: didn't reach the end of the OBS list!"); } dbgLog.fine("RT2 metadata-related exit-chores"); ///smd.getFileInformation().put("varQnty", variableCounter); dataTable.setVarQuantity(new Long(variableCounter)); dbgLog.fine("RT2: varQnty=" + variableCounter); // 4.0 Initialize variables: List<DataVariable> variableList = new ArrayList<DataVariable>(); for (int i = 0; i < variableCounter; i++) { DataVariable dv = new DataVariable(); String varName = variableNameList.get(i); dbgLog.fine("name: " + varName); dv.setName(varName); String varLabel = variableLabelMap.get(varName); if (varLabel != null && varLabel.length() > 255) { // TODO: // variable labels will be changed into type 'TEXT' in the // database - this will eliminate the 255 char. limit. // -- L.A. 4.0 beta11 dbgLog.fine("Have to truncate label: " + varLabel); varLabel = varLabel.substring(0, 255); } dbgLog.fine("label: " + varLabel); dv.setLabel(varLabel); dv.setInvalidRanges(new ArrayList<VariableRange>()); dv.setSummaryStatistics(new ArrayList<SummaryStatistic>()); dv.setUnf("UNF:6:"); dv.setCategories(new ArrayList<VariableCategory>()); variableList.add(dv); dv.setFileOrder(i); dv.setDataTable(dataTable); } dataTable.setDataVariables(variableList); ///smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()])); ///smd.setVariableLabel(variableLabelMap); // TODO: // figure out what to do with the missing value table! // -- 4.0 alpha // well, they were used to generate merged summary statistics for // the variable. So need to verify what the DDI import was doing // with them and replicate the same in 4.0. // (add appropriate value labels?) ///TODO: 4.0 smd.setMissingValueTable(missingValueTable); ///smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName); dbgLog.fine("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray())); dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList); dbgLog.fine("decodeRecordType2(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType3and4(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType3and4(): start *****"); Map<String, Map<String, String>> valueLabelTable = new LinkedHashMap<String, Map<String, String>>(); int safteyCounter = 0; while (true) { try {/*from w w w . j av a2s.c o m*/ if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this secton may not exit so first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 3.0 check the first 4 bytes byte[] headerCode = new byte[LENGTH_RECORD_TYPE3_CODE]; int nbytes_rt3 = stream.read(headerCode, 0, LENGTH_RECORD_TYPE3_CODE); // to-do check against nbytes //printHexDump(headerCode, "RT3 header test"); ByteBuffer bb_header_code = ByteBuffer.wrap(headerCode, 0, LENGTH_RECORD_TYPE3_CODE); if (isLittleEndian) { bb_header_code.order(ByteOrder.LITTLE_ENDIAN); } int intRT3test = bb_header_code.getInt(); dbgLog.fine("header test value: RT3=" + intRT3test); if (intRT3test != 3) { //if (stream.markSupported()){ dbgLog.fine("iteration=" + safteyCounter); // We have encountered a record that's not type 3. This means we've // processed all the type 3/4 record pairs. So we want to rewind // the stream and return -- so that the appropriate record type // reader can be called on it. // But before we return, we need to save all the value labels // we have found: smd.setValueLabelTable(valueLabelTable); stream.reset(); return; //} } // 3.1 how many value-label pairs follow byte[] number_of_labels = new byte[LENGTH_RT3_HOW_MANY_LABELS]; int nbytes_3_1 = stream.read(number_of_labels); if (nbytes_3_1 == 0) { throw new IOException("RT 3: reading recordType3.1: no byte was read"); } ByteBuffer bb_number_of_labels = ByteBuffer.wrap(number_of_labels, 0, LENGTH_RT3_HOW_MANY_LABELS); if (isLittleEndian) { bb_number_of_labels.order(ByteOrder.LITTLE_ENDIAN); } int numberOfValueLabels = bb_number_of_labels.getInt(); dbgLog.fine("number of value-label pairs=" + numberOfValueLabels); ByteBuffer[] tempBB = new ByteBuffer[numberOfValueLabels]; String valueLabel[] = new String[numberOfValueLabels]; for (int i = 0; i < numberOfValueLabels; i++) { // read 8-byte as value byte[] value = new byte[LENGTH_RT3_VALUE]; int nbytes_3_value = stream.read(value); if (nbytes_3_value == 0) { throw new IOException("RT 3: reading recordType3 value: no byte was read"); } // note these 8 bytes are interpreted later // currently no information about which variable's (=> type unknown) ByteBuffer bb_value = ByteBuffer.wrap(value, 0, LENGTH_RT3_VALUE); if (isLittleEndian) { bb_value.order(ByteOrder.LITTLE_ENDIAN); } tempBB[i] = bb_value; dbgLog.fine("bb_value=" + Hex.encodeHex(bb_value.array())); /* double valueD = bb_value.getDouble(); dbgLog.fine("value="+valueD); */ // read 1st byte as unsigned integer = label_length // read label_length byte as label byte[] labelLengthByte = new byte[LENGTH_RT3_LABEL_LENGTH]; int nbytes_3_label_length = stream.read(labelLengthByte); // add check-routine here dbgLog.fine("labelLengthByte" + Hex.encodeHex(labelLengthByte)); dbgLog.fine("label length = " + labelLengthByte[0]); // the net-length of a value label is saved as // unsigned byte; however, the length is less than 127 // byte should be ok int rawLabelLength = labelLengthByte[0] & 0xFF; dbgLog.fine("rawLabelLength=" + rawLabelLength); // -1 =>1-byte already read int labelLength = getSAVobsAdjustedBlockLength(rawLabelLength + 1) - 1; byte[] valueLabelBytes = new byte[labelLength]; int nbytes_3_value_label = stream.read(valueLabelBytes); // ByteBuffer bb_label = ByteBuffer.wrap(valueLabel,0,labelLength); valueLabel[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(valueLabelBytes, 0, rawLabelLength), defaultCharSet), " "); dbgLog.fine(i + "-th valueLabel=" + valueLabel[i] + "<-"); } // iter rt3 dbgLog.fine("end of RT3 block"); dbgLog.fine("start of RT4 block"); // 4.0 check the first 4 bytes byte[] headerCode4 = new byte[LENGTH_RECORD_TYPE4_CODE]; int nbytes_rt4 = stream.read(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE); if (nbytes_rt4 == 0) { throw new IOException("RT4: reading recordType4 value: no byte was read"); } //printHexDump(headerCode4, "RT4 header test"); ByteBuffer bb_header_code_4 = ByteBuffer.wrap(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE); if (isLittleEndian) { bb_header_code_4.order(ByteOrder.LITTLE_ENDIAN); } int intRT4test = bb_header_code_4.getInt(); dbgLog.fine("header test value: RT4=" + intRT4test); if (intRT4test != 4) { throw new IOException("RT 4: reading recordType4 header: no byte was read"); } // 4.1 read the how-many-variables bytes byte[] howManyVariablesfollow = new byte[LENGTH_RT4_HOW_MANY_VARIABLES]; int nbytes_rt4_1 = stream.read(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES); ByteBuffer bb_howManyVariablesfollow = ByteBuffer.wrap(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES); if (isLittleEndian) { bb_howManyVariablesfollow.order(ByteOrder.LITTLE_ENDIAN); } int howManyVariablesRT4 = bb_howManyVariablesfollow.getInt(); dbgLog.fine("how many variables follow: RT4=" + howManyVariablesRT4); int length_indicies = LENGTH_RT4_VARIABLE_INDEX * howManyVariablesRT4; byte[] variableIdicesBytes = new byte[length_indicies]; int nbytes_rt4_2 = stream.read(variableIdicesBytes, 0, length_indicies); // !!!!! Caution: variableIndex in RT4 starts from 1 NOT ** 0 ** int[] variableIndex = new int[howManyVariablesRT4]; int offset = 0; for (int i = 0; i < howManyVariablesRT4; i++) { ByteBuffer bb_variable_index = ByteBuffer.wrap(variableIdicesBytes, offset, LENGTH_RT4_VARIABLE_INDEX); offset += LENGTH_RT4_VARIABLE_INDEX; if (isLittleEndian) { bb_variable_index.order(ByteOrder.LITTLE_ENDIAN); } variableIndex[i] = bb_variable_index.getInt(); dbgLog.fine(i + "-th variable index number=" + variableIndex[i]); } dbgLog.fine("variable index set=" + ArrayUtils.toString(variableIndex)); dbgLog.fine("subtract 1 from variableIndex for getting a variable info"); boolean isNumeric = OBSwiseTypelList.get(variableIndex[0] - 1) == 0 ? true : false; Map<String, String> valueLabelPair = new LinkedHashMap<String, String>(); if (isNumeric) { // numeric variable dbgLog.fine("processing of a numeric value-label table"); for (int j = 0; j < numberOfValueLabels; j++) { valueLabelPair.put(doubleNumberFormatter.format(tempBB[j].getDouble()), valueLabel[j]); } } else { // String variable dbgLog.fine("processing of a string value-label table"); for (int j = 0; j < numberOfValueLabels; j++) { valueLabelPair.put( StringUtils.stripEnd(new String((tempBB[j].array()), defaultCharSet), " "), valueLabel[j]); } } dbgLog.fine("valueLabePair=" + valueLabelPair); dbgLog.fine("key variable's (raw) index =" + variableIndex[0]); valueLabelTable.put(OBSIndexToVariableName.get(variableIndex[0] - 1), valueLabelPair); dbgLog.fine("valueLabelTable=" + valueLabelTable); // create a mapping table that finds the key variable for this mapping table String keyVariableName = OBSIndexToVariableName.get(variableIndex[0] - 1); for (int vn : variableIndex) { valueVariableMappingTable.put(OBSIndexToVariableName.get(vn - 1), keyVariableName); } dbgLog.fine("valueVariableMappingTable:\n" + valueVariableMappingTable); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } safteyCounter++; if (safteyCounter >= 1000000) { break; } } //while smd.setValueLabelTable(valueLabelTable); dbgLog.fine("***** decodeRecordType3and4(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType3and4(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType3and4(): start"); Map<String, Map<String, String>> valueLabelTable = new LinkedHashMap<String, Map<String, String>>(); int safteyCounter = 0; while (true) { try {//w w w.ja v a 2 s . c o m if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this secton may not exit so first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 3.0 check the first 4 bytes byte[] headerCode = new byte[LENGTH_RECORD_TYPE3_CODE]; int nbytes_rt3 = stream.read(headerCode, 0, LENGTH_RECORD_TYPE3_CODE); // to-do check against nbytes //printHexDump(headerCode, "RT3 header test"); ByteBuffer bb_header_code = ByteBuffer.wrap(headerCode, 0, LENGTH_RECORD_TYPE3_CODE); if (isLittleEndian) { bb_header_code.order(ByteOrder.LITTLE_ENDIAN); } int intRT3test = bb_header_code.getInt(); dbgLog.fine("header test value: RT3=" + intRT3test); if (intRT3test != 3) { //if (stream.markSupported()){ dbgLog.fine("iteration=" + safteyCounter); // We have encountered a record that's not type 3. This means we've // processed all the type 3/4 record pairs. So we want to rewind // the stream and return -- so that the appropriate record type // reader can be called on it. // But before we return, we need to save all the value labels // we have found: //smd.setValueLabelTable(valueLabelTable); assignValueLabels(valueLabelTable); stream.reset(); return; //} } // 3.1 how many value-label pairs follow byte[] number_of_labels = new byte[LENGTH_RT3_HOW_MANY_LABELS]; int nbytes_3_1 = stream.read(number_of_labels); if (nbytes_3_1 == 0) { throw new IOException("RT 3: reading recordType3.1: no byte was read"); } ByteBuffer bb_number_of_labels = ByteBuffer.wrap(number_of_labels, 0, LENGTH_RT3_HOW_MANY_LABELS); if (isLittleEndian) { bb_number_of_labels.order(ByteOrder.LITTLE_ENDIAN); } int numberOfValueLabels = bb_number_of_labels.getInt(); dbgLog.fine("number of value-label pairs=" + numberOfValueLabels); ByteBuffer[] tempBB = new ByteBuffer[numberOfValueLabels]; String valueLabel[] = new String[numberOfValueLabels]; for (int i = 0; i < numberOfValueLabels; i++) { // read 8-byte as value byte[] value = new byte[LENGTH_RT3_VALUE]; int nbytes_3_value = stream.read(value); if (nbytes_3_value == 0) { throw new IOException("RT 3: reading recordType3 value: no byte was read"); } // note these 8 bytes are interpreted later // currently no information about which variable's (=> type unknown) ByteBuffer bb_value = ByteBuffer.wrap(value, 0, LENGTH_RT3_VALUE); if (isLittleEndian) { bb_value.order(ByteOrder.LITTLE_ENDIAN); } tempBB[i] = bb_value; dbgLog.fine("bb_value=" + Hex.encodeHex(bb_value.array())); /* double valueD = bb_value.getDouble(); dbgLog.fine("value="+valueD); */ // read 1st byte as unsigned integer = label_length // read label_length byte as label byte[] labelLengthByte = new byte[LENGTH_RT3_LABEL_LENGTH]; int nbytes_3_label_length = stream.read(labelLengthByte); // add check-routine here dbgLog.fine("labelLengthByte" + Hex.encodeHex(labelLengthByte)); dbgLog.fine("label length = " + labelLengthByte[0]); // the net-length of a value label is saved as // unsigned byte; however, the length is less than 127 // byte should be ok int rawLabelLength = labelLengthByte[0] & 0xFF; dbgLog.fine("rawLabelLength=" + rawLabelLength); // -1 =>1-byte already read int labelLength = getSAVobsAdjustedBlockLength(rawLabelLength + 1) - 1; byte[] valueLabelBytes = new byte[labelLength]; int nbytes_3_value_label = stream.read(valueLabelBytes); // ByteBuffer bb_label = ByteBuffer.wrap(valueLabel,0,labelLength); valueLabel[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(valueLabelBytes, 0, rawLabelLength), defaultCharSet), " "); dbgLog.fine(i + "-th valueLabel=" + valueLabel[i] + "<-"); } // iter rt3 dbgLog.fine("end of RT3 block"); dbgLog.fine("start of RT4 block"); // 4.0 check the first 4 bytes byte[] headerCode4 = new byte[LENGTH_RECORD_TYPE4_CODE]; int nbytes_rt4 = stream.read(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE); if (nbytes_rt4 == 0) { throw new IOException("RT4: reading recordType4 value: no byte was read"); } //printHexDump(headerCode4, "RT4 header test"); ByteBuffer bb_header_code_4 = ByteBuffer.wrap(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE); if (isLittleEndian) { bb_header_code_4.order(ByteOrder.LITTLE_ENDIAN); } int intRT4test = bb_header_code_4.getInt(); dbgLog.fine("header test value: RT4=" + intRT4test); if (intRT4test != 4) { throw new IOException("RT 4: reading recordType4 header: no byte was read"); } // 4.1 read the how-many-variables bytes byte[] howManyVariablesfollow = new byte[LENGTH_RT4_HOW_MANY_VARIABLES]; int nbytes_rt4_1 = stream.read(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES); ByteBuffer bb_howManyVariablesfollow = ByteBuffer.wrap(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES); if (isLittleEndian) { bb_howManyVariablesfollow.order(ByteOrder.LITTLE_ENDIAN); } int howManyVariablesRT4 = bb_howManyVariablesfollow.getInt(); dbgLog.fine("how many variables follow: RT4=" + howManyVariablesRT4); int length_indicies = LENGTH_RT4_VARIABLE_INDEX * howManyVariablesRT4; byte[] variableIdicesBytes = new byte[length_indicies]; int nbytes_rt4_2 = stream.read(variableIdicesBytes, 0, length_indicies); // !!!!! Caution: variableIndex in RT4 starts from 1 NOT ** 0 ** int[] variableIndex = new int[howManyVariablesRT4]; int offset = 0; for (int i = 0; i < howManyVariablesRT4; i++) { ByteBuffer bb_variable_index = ByteBuffer.wrap(variableIdicesBytes, offset, LENGTH_RT4_VARIABLE_INDEX); offset += LENGTH_RT4_VARIABLE_INDEX; if (isLittleEndian) { bb_variable_index.order(ByteOrder.LITTLE_ENDIAN); } variableIndex[i] = bb_variable_index.getInt(); dbgLog.fine(i + "-th variable index number=" + variableIndex[i]); } dbgLog.fine("variable index set=" + ArrayUtils.toString(variableIndex)); dbgLog.fine("subtract 1 from variableIndex for getting a variable info"); boolean isNumeric = OBSwiseTypelList.get(variableIndex[0] - 1) == 0 ? true : false; Map<String, String> valueLabelPair = new LinkedHashMap<String, String>(); if (isNumeric) { // numeric variable dbgLog.fine("processing of a numeric value-label table"); for (int j = 0; j < numberOfValueLabels; j++) { valueLabelPair.put(doubleNumberFormatter.format(tempBB[j].getDouble()), valueLabel[j]); } } else { // String variable dbgLog.fine("processing of a string value-label table"); for (int j = 0; j < numberOfValueLabels; j++) { valueLabelPair.put( StringUtils.stripEnd(new String((tempBB[j].array()), defaultCharSet), " "), valueLabel[j]); } } dbgLog.fine("valueLabePair=" + valueLabelPair); dbgLog.fine("key variable's (raw) index =" + variableIndex[0]); valueLabelTable.put(OBSIndexToVariableName.get(variableIndex[0] - 1), valueLabelPair); dbgLog.fine("valueLabelTable=" + valueLabelTable); // create a mapping table that finds the key variable for this mapping table String keyVariableName = OBSIndexToVariableName.get(variableIndex[0] - 1); for (int vn : variableIndex) { valueVariableMappingTable.put(OBSIndexToVariableName.get(vn - 1), keyVariableName); } dbgLog.fine("valueVariableMappingTable:\n" + valueVariableMappingTable); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } safteyCounter++; if (safteyCounter >= 1000000) { break; } } //while ///smd.setValueLabelTable(valueLabelTable); assignValueLabels(valueLabelTable); dbgLog.fine("***** decodeRecordType3and4(): end *****"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType6(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType6(): start *****"); try {/* ww w.ja v a 2 s. co m*/ if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this secton may not exit so first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 6.0 check the first 4 bytes byte[] headerCodeRt6 = new byte[LENGTH_RECORD_TYPE6_CODE]; int nbytes_rt6 = stream.read(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); // to-do check against nbytes //printHexDump(headerCodeRt6, "RT6 header test"); ByteBuffer bb_header_code_rt6 = ByteBuffer.wrap(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); if (isLittleEndian) { bb_header_code_rt6.order(ByteOrder.LITTLE_ENDIAN); } int intRT6test = bb_header_code_rt6.getInt(); dbgLog.fine("RT6: header test value=" + intRT6test); if (intRT6test != 6) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT6test failed=" + intRT6test); stream.reset(); return; //} } // 6.1 check 4-byte integer that tells how many lines follow byte[] length_how_many_line_bytes = new byte[LENGTH_RT6_HOW_MANY_LINES]; int nbytes_rt6_1 = stream.read(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT6 how_many_line_bytes"); ByteBuffer bb_how_many_lines = ByteBuffer.wrap(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); if (isLittleEndian) { bb_how_many_lines.order(ByteOrder.LITTLE_ENDIAN); } int howManyLinesRt6 = bb_how_many_lines.getInt(); dbgLog.fine("how Many lines follow=" + howManyLinesRt6); // 6.2 read 80-char-long lines String[] documentRecord = new String[howManyLinesRt6]; for (int i = 0; i < howManyLinesRt6; i++) { byte[] line = new byte[80]; int nbytes_rt6_line = stream.read(line); documentRecord[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(line, 0, LENGTH_RT6_DOCUMENT_LINE), defaultCharSet), " "); dbgLog.fine(i + "-th line =" + documentRecord[i] + "<-"); } dbgLog.fine("documentRecord:\n" + StringUtils.join(documentRecord, "\n")); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("***** decodeRecordType6(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType6(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType6(): start *****"); try {/*from ww w . j a v a 2 s . c om*/ if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this section is optional; so let's first check the 4-byte header // value and see what type it is. //if (stream.markSupported()){ // -- ? L.A. 4.0 alpha stream.mark(1000); //} // 6.0 check the first 4 bytes byte[] headerCodeRt6 = new byte[LENGTH_RECORD_TYPE6_CODE]; int nbytes_rt6 = stream.read(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); // to-do check against nbytes //printHexDump(headerCodeRt6, "RT6 header test"); ByteBuffer bb_header_code_rt6 = ByteBuffer.wrap(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); if (isLittleEndian) { bb_header_code_rt6.order(ByteOrder.LITTLE_ENDIAN); } int intRT6test = bb_header_code_rt6.getInt(); dbgLog.fine("RT6: header test value=" + intRT6test); if (intRT6test != 6) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT6test failed=" + intRT6test); stream.reset(); return; //} } // 6.1 check 4-byte integer that tells how many lines follow byte[] length_how_many_line_bytes = new byte[LENGTH_RT6_HOW_MANY_LINES]; int nbytes_rt6_1 = stream.read(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT6 how_many_line_bytes"); ByteBuffer bb_how_many_lines = ByteBuffer.wrap(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); if (isLittleEndian) { bb_how_many_lines.order(ByteOrder.LITTLE_ENDIAN); } int howManyLinesRt6 = bb_how_many_lines.getInt(); dbgLog.fine("how Many lines follow=" + howManyLinesRt6); // 6.2 read 80-char-long lines String[] documentRecord = new String[howManyLinesRt6]; for (int i = 0; i < howManyLinesRt6; i++) { byte[] line = new byte[80]; int nbytes_rt6_line = stream.read(line); documentRecord[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(line, 0, LENGTH_RT6_DOCUMENT_LINE), defaultCharSet), " "); dbgLog.fine(i + "-th line =" + documentRecord[i] + "<-"); } dbgLog.fine("documentRecord:\n" + StringUtils.join(documentRecord, "\n")); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("decodeRecordType6(): end"); }