List of usage examples for java.util BitSet BitSet
private BitSet(long[] words)
From source file:org.ala.spatial.analysis.layers.SitesBySpeciesTabulated.java
/** * Generate and write the sites by species list. * <p/>//from w ww . j av a 2 s.com * Output files have both .csv and .json decades, tabulation by decades * decadecounts, tabulation by (species in) sequential decades * bioregionName, tabulation by bioregions (from ssf or grid & gridColumns) * * @param records all occurrence records for this density grid as Records. * @param outputDirectory path to the output directory. * @param region area restriction, or null for everywhere the occurrences * appear, as SimpleRegion. * @param envelopeGrid area restriction as an envelope grid, or null for * everywhere the occurrences appear, as Grid * @param bioregionName null or output bioregion name. * @param ssf null or bioregion as shape file with a single column as * SimpleRegion. * @param grid null or bioregion as Grid. Must also have gridColumns. * @param gridColumns null or grid bioregion category lookup values as * String []. * @param decade true to generate decades and decadecounts output * tabulations. * @throws IOException */ public void write(Records records, String outputDirectory, SimpleRegion region, Grid envelopeGrid, String bioregionName, SimpleShapeFile ssf, Grid grid, String[] gridColumns, boolean decade) throws IOException { String[] columns = null; int[] gridIntersections = null; int numberOfBioregions = 0; // get columns for bioregion categories from ssf or gridColumns. if (ssf != null) { columns = ssf.getColumnLookup(); } else if (grid != null) { columns = gridColumns; gridIntersections = new int[records.getRecordsSize()]; double[][] points = new double[records.getRecordsSize()][2]; for (int i = 0; i < records.getRecordsSize(); i++) { points[i][0] = records.getLongitude(i); points[i][1] = records.getLatitude(i); } float[] f = grid.getValues(points); for (int i = 0; i < f.length; i++) { gridIntersections[i] = (int) f[i]; if (gridIntersections[i] < 0 || gridIntersections[i] >= gridColumns.length + 1) { gridIntersections[i] = -1; } } f = null; points = null; } if (columns != null) { numberOfBioregions = columns.length + 1; } int uniqueSpeciesCount = records.getSpeciesSize(); short[] decadeIdx = getDecadeIdx(records); int numberOfDecades = decadeIdx[decadeIdx.length - 1] + 1; HashMap<Integer, Integer>[] bioMap = new HashMap[numberOfBioregions]; HashMap<Integer, Integer>[] decMap = new HashMap[numberOfDecades]; HashMap<Integer, Integer>[] decCountMap = new HashMap[numberOfDecades + 1]; for (int i = 0; i < bioMap.length; i++) { bioMap[i] = new HashMap<Integer, Integer>(); } for (int i = 0; i < decMap.length; i++) { decMap[i] = new HashMap<Integer, Integer>(); } for (int i = 0; i < decCountMap.length; i++) { decCountMap[i] = new HashMap<Integer, Integer>(); } records.sortedStarts(bbox[1], bbox[0], resolution); BitSet[] bsDecades = new BitSet[numberOfDecades]; BitSet[] bsBioregions = new BitSet[numberOfBioregions]; for (int j = 0; j < numberOfBioregions; j++) { bsBioregions[j] = new BitSet(uniqueSpeciesCount); } for (int j = 0; j < numberOfDecades; j++) { bsDecades[j] = new BitSet(uniqueSpeciesCount); } int[] decContinousCounts = new int[records.getSpeciesSize()]; for (int pos = 0; pos < records.getRecordsSize();) { //find end pos int x = (int) ((records.getLongitude(pos) - bbox[0]) / resolution); int y = (int) ((records.getLatitude(pos) - bbox[1]) / resolution); int endPos = pos + 1; while (endPos < records.getRecordsSize() && x == (int) ((records.getLongitude(endPos) - bbox[0]) / resolution) && y == (int) ((records.getLatitude(pos) - bbox[1]) / resolution)) { endPos++; } double longitude = (x + 0.5) * resolution; double latitude = (y + 0.5) * resolution; if ((region == null || region.isWithin_EPSG900913(longitude, latitude)) && (envelopeGrid == null || envelopeGrid.getValues2(new double[][] { { longitude, latitude } })[0] > 0)) { //process this cell getNextIntArrayRow(records, pos, endPos, bsBioregions, bsDecades, ssf, gridIntersections, decadeIdx); for (int j = 0; j < numberOfBioregions; j++) { int group = bsBioregions[j].cardinality(); if (group > 0) { Integer count = bioMap[j].get(group); bioMap[j].put(group, count == null ? 1 : count + 1); } } for (int j = 0; j < numberOfDecades; j++) { int group = bsDecades[j].cardinality(); if (group > 0) { Integer count = decMap[j].get(group); decMap[j].put(group, count == null ? 1 : count + 1); } } //reset for (int j = 0; j < decContinousCounts.length; j++) { decContinousCounts[j] = 0; } //sum for (int j = 0; j < numberOfDecades; j++) { BitSet bs = bsDecades[j]; if (bs.cardinality() > 0) { for (int k = 0; k < bs.length(); k++) { if (bs.get(k)) { decContinousCounts[k]++; } } } } //count java.util.Arrays.sort(decContinousCounts); int count = 1; for (int j = 1; j < decContinousCounts.length; j++) { if (decContinousCounts[j] == decContinousCounts[j - 1]) { count++; } else { Integer c = decCountMap[decContinousCounts[j - 1]].get(count); decCountMap[decContinousCounts[j - 1]].put(count, c == null ? 1 : c + 1); count = 1; } } Integer c = decCountMap[decContinousCounts[decContinousCounts.length - 1]].get(count); decCountMap[decContinousCounts[decContinousCounts.length - 1]].put(count, c == null ? 1 : c + 1); } pos = endPos; } if (numberOfBioregions > 0) { writeBioregions(bioregionName, outputDirectory, columns, bioMap); } writeDecades(outputDirectory, decadeIdx, decMap); writeDecadeCounts(outputDirectory, decCountMap); }
From source file:com.cloudera.oryx.kmeans.computation.cluster.KSketchIndex.java
private static int hammingDistance(BitSet q, BitSet idx) { BitSet x = new BitSet(q.size()); x.or(q);// ww w . ja v a 2s . c o m x.xor(idx); return x.cardinality(); }
From source file:org.wso2.andes.kernel.router.TopicRoutingMatcher.java
/** * @param constituentIndex The index to create the constituent for * @return The created constituent table *//*from w w w. j ava 2 s.com*/ private Map<String, BitSet> addConstituentTable(int constituentIndex) { Map<String, BitSet> constituentTable = new HashMap<>(); BitSet nullBitSet = new BitSet(storageQueueList.size()); BitSet otherBitSet = new BitSet(storageQueueList.size()); // Fill null and other constituent values for all available queues for (int queueIndex = 0; queueIndex < storageQueueList.size(); queueIndex++) { String[] constituentsOfQueue = queueConstituents.get(queueIndex); if (constituentsOfQueue.length < constituentIndex + 1) { // There is no constituent in this queue for this constituent index nullBitSet.set(queueIndex); // If last constituent of the queue is multiLevelWildCard, then any other is a match if (multiLevelWildCard.equals(constituentsOfQueue[constituentsOfQueue.length - 1])) { otherBitSet.set(queueIndex); } } else { String queueConstituent = constituentsOfQueue[constituentIndex]; // Check if this is a wildcard if (multiLevelWildCard.equals(queueConstituent) || singleLevelWildCard.equals(queueConstituent)) { otherBitSet.set(queueIndex); } } } // Add 'null' and 'other' constituent constituentTable.put(NULL_CONSTITUENT, nullBitSet); constituentTable.put(OTHER_CONSTITUENT, otherBitSet); constituentTables.add(constituentIndex, constituentTable); return constituentTable; }
From source file:org.wso2.andes.subscription.TopicSubscriptionBitMapStore.java
/** * @param constituentIndex The index to create the constituent for * @return The created constituent table *//*from ww w .ja va2s . com*/ private Map<String, BitSet> addConstituentTable(int constituentIndex) { Map<String, BitSet> constituentTable = new HashMap<String, BitSet>(); BitSet nullBitSet = new BitSet(subscriptionList.size()); BitSet otherBitSet = new BitSet(subscriptionList.size()); // Fill null and other constituent values for all available subscriptions for (int subscriptionIndex = 0; subscriptionIndex < subscriptionList.size(); subscriptionIndex++) { String[] constituentsOfSubscription = subscriptionConstituents.get(subscriptionIndex); if (constituentsOfSubscription.length < constituentIndex + 1) { // There is no constituent in this subscription for this constituent index nullBitSet.set(subscriptionIndex); // If last constituent of the subscription is multiLevelWildCard, then any other is a match if (multiLevelWildCard.equals(constituentsOfSubscription[constituentsOfSubscription.length - 1])) { otherBitSet.set(subscriptionIndex); } } else { String subscriptionConstituent = constituentsOfSubscription[constituentIndex]; // Check if this is a wildcard if (multiLevelWildCard.equals(subscriptionConstituent) || singleLevelWildCard.equals(subscriptionConstituent)) { otherBitSet.set(subscriptionIndex); } } } // Add 'null' and 'other' constituent constituentTable.put(NULL_CONSTITUENT, nullBitSet); constituentTable.put(OTHER_CONSTITUENT, otherBitSet); constituentTables.add(constituentIndex, constituentTable); return constituentTable; }
From source file:org.apache.hadoop.hive.serde2.compression.SnappyCompDe.java
/** * Compress a set of columns./*from w w w . j a v a 2 s . c om*/ * * The header contains a compressed array of data types. * The body contains compressed columns and their metadata. * The footer contains a compressed array of chunk sizes. The final four bytes of the footer encode the byte size of that compressed array. * * @param colSet * * @return ByteBuffer representing the compressed set. */ @Override public ByteBuffer compress(ColumnBuffer[] colSet) { // Many compression libraries allow you to avoid allocation of intermediate arrays. // To use these API, we need to preallocate the output container. // Reserve space for the header. int[] dataType = new int[colSet.length]; int maxCompressedSize = Snappy.maxCompressedLength(4 * dataType.length); // Reserve space for the compressed nulls BitSet for each column. maxCompressedSize += colSet.length * Snappy.maxCompressedLength((colSet.length / 8) + 1); // Track the length of `List<Integer> compressedSize` which will be declared later. int uncompressedFooterLength = 1 + 2 * colSet.length; for (int colNum = 0; colNum < colSet.length; ++colNum) { // Reserve space for the compressed columns. dataType[colNum] = colSet[colNum].getType().toTType().getValue(); switch (TTypeId.findByValue(dataType[colNum])) { case BOOLEAN_TYPE: maxCompressedSize += Integer.SIZE / Byte.SIZE; // This is for the encoded length. maxCompressedSize += Snappy.maxCompressedLength((colSet.length / 8) + 1); break; case TINYINT_TYPE: maxCompressedSize += Snappy.maxCompressedLength(colSet.length); break; case SMALLINT_TYPE: maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Short.SIZE / Byte.SIZE); break; case INT_TYPE: maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Integer.SIZE / Byte.SIZE); break; case BIGINT_TYPE: maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Long.SIZE / Byte.SIZE); break; case DOUBLE_TYPE: maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Double.SIZE / Byte.SIZE); break; case BINARY_TYPE: // Reserve space for the size of the compressed array of row sizes. maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Integer.SIZE / Byte.SIZE); // Reserve space for the size of the compressed flattened bytes. for (ByteBuffer nextBuffer : colSet[colNum].toTColumn().getBinaryVal().getValues()) { maxCompressedSize += Snappy.maxCompressedLength(nextBuffer.limit()); } // Add an additional value to the list of compressed chunk sizes (length of `rowSize` array). uncompressedFooterLength++; break; case STRING_TYPE: // Reserve space for the size of the compressed array of row sizes. maxCompressedSize += Snappy.maxCompressedLength(colSet.length * Integer.SIZE / Byte.SIZE); // Reserve space for the size of the compressed flattened bytes. for (String nextString : colSet[colNum].toTColumn().getStringVal().getValues()) { maxCompressedSize += Snappy .maxCompressedLength(nextString.getBytes(StandardCharsets.UTF_8).length); } // Add an additional value to the list of compressed chunk sizes (length of `rowSize` array). uncompressedFooterLength++; break; default: throw new IllegalStateException("Unrecognized column type"); } } // Reserve space for the footer. maxCompressedSize += Snappy.maxCompressedLength(uncompressedFooterLength * Integer.SIZE / Byte.SIZE); // Allocate the output container. ByteBuffer output = ByteBuffer.allocate(maxCompressedSize); // Allocate the footer. This goes in the footer because we don't know the chunk sizes until after // the columns have been compressed and written. ArrayList<Integer> compressedSize = new ArrayList<Integer>(uncompressedFooterLength); // Write to the output buffer. try { // Write the header. compressedSize.add(writePrimitives(dataType, output)); // Write the compressed columns and metadata. for (int colNum = 0; colNum < colSet.length; colNum++) { switch (TTypeId.findByValue(dataType[colNum])) { case BOOLEAN_TYPE: { TBoolColumn column = colSet[colNum].toTColumn().getBoolVal(); List<Boolean> bools = column.getValues(); BitSet bsBools = new BitSet(bools.size()); for (int rowNum = 0; rowNum < bools.size(); rowNum++) { bsBools.set(rowNum, bools.get(rowNum)); } compressedSize.add(writePrimitives(column.getNulls(), output)); // BitSet won't write trailing zeroes so we encode the length output.putInt(column.getValuesSize()); compressedSize.add(writePrimitives(bsBools.toByteArray(), output)); break; } case TINYINT_TYPE: { TByteColumn column = colSet[colNum].toTColumn().getByteVal(); compressedSize.add(writePrimitives(column.getNulls(), output)); compressedSize.add(writeBoxedBytes(column.getValues(), output)); break; } case SMALLINT_TYPE: { TI16Column column = colSet[colNum].toTColumn().getI16Val(); compressedSize.add(writePrimitives(column.getNulls(), output)); compressedSize.add(writeBoxedShorts(column.getValues(), output)); break; } case INT_TYPE: { TI32Column column = colSet[colNum].toTColumn().getI32Val(); compressedSize.add(writePrimitives(column.getNulls(), output)); compressedSize.add(writeBoxedIntegers(column.getValues(), output)); break; } case BIGINT_TYPE: { TI64Column column = colSet[colNum].toTColumn().getI64Val(); compressedSize.add(writePrimitives(column.getNulls(), output)); compressedSize.add(writeBoxedLongs(column.getValues(), output)); break; } case DOUBLE_TYPE: { TDoubleColumn column = colSet[colNum].toTColumn().getDoubleVal(); compressedSize.add(writePrimitives(column.getNulls(), output)); compressedSize.add(writeBoxedDoubles(column.getValues(), output)); break; } case BINARY_TYPE: { TBinaryColumn column = colSet[colNum].toTColumn().getBinaryVal(); // Initialize the array of row sizes. int[] rowSizes = new int[column.getValuesSize()]; int totalSize = 0; for (int rowNum = 0; rowNum < column.getValuesSize(); rowNum++) { rowSizes[rowNum] = column.getValues().get(rowNum).limit(); totalSize += column.getValues().get(rowNum).limit(); } // Flatten the data for Snappy for a better compression ratio. ByteBuffer flattenedData = ByteBuffer.allocate(totalSize); for (int rowNum = 0; rowNum < column.getValuesSize(); rowNum++) { flattenedData.put(column.getValues().get(rowNum)); } // Write nulls bitmap. compressedSize.add(writePrimitives(column.getNulls(), output)); // Write the list of row sizes. compressedSize.add(writePrimitives(rowSizes, output)); // Write the compressed, flattened data. compressedSize.add(writePrimitives(flattenedData.array(), output)); break; } case STRING_TYPE: { TStringColumn column = colSet[colNum].toTColumn().getStringVal(); // Initialize the array of row sizes. int[] rowSizes = new int[column.getValuesSize()]; int totalSize = 0; for (int rowNum = 0; rowNum < column.getValuesSize(); rowNum++) { rowSizes[rowNum] = column.getValues().get(rowNum).length(); totalSize += column.getValues().get(rowNum).length(); } // Flatten the data for Snappy for a better compression ratio. StringBuilder flattenedData = new StringBuilder(totalSize); for (int rowNum = 0; rowNum < column.getValuesSize(); rowNum++) { flattenedData.append(column.getValues().get(rowNum)); } // Write nulls bitmap. compressedSize.add(writePrimitives(column.getNulls(), output)); // Write the list of row sizes. compressedSize.add(writePrimitives(rowSizes, output)); // Write the flattened data. compressedSize.add( writePrimitives(flattenedData.toString().getBytes(StandardCharsets.UTF_8), output)); break; } default: throw new IllegalStateException("Unrecognized column type"); } } // Write the footer. output.putInt(writeBoxedIntegers(compressedSize, output)); } catch (IOException e) { e.printStackTrace(); } output.flip(); return output; }
From source file:flink.iso8583.MessageFactory.java
/** Creates a new message instance from the buffer, which must contain a valid ISO8583 * message. If the factory is set to use binary messages then it will try to parse * a binary message.// w ww .j av a 2 s.com * @param buf The byte buffer containing the message. Must not include the length header. * @param isoHeaderLength The expected length of the ISO header, after which the message type * and the rest of the message must come. */ public IsoMessage parseMessage(byte[] buf, int isoHeaderLength) throws ParseException { IsoMessage m = new IsoMessage(isoHeaderLength > 0 ? new String(buf, 0, isoHeaderLength) : null); //TODO it only parses ASCII messages for now int type = 0; if (useBinary) { type = ((buf[isoHeaderLength] & 0xff) << 8) | (buf[isoHeaderLength + 1] & 0xff); } else { type = ((buf[isoHeaderLength] - 48) << 12) | ((buf[isoHeaderLength + 1] - 48) << 8) | ((buf[isoHeaderLength + 2] - 48) << 4) | (buf[isoHeaderLength + 3] - 48); } m.setType(type); //Parse the bitmap (primary first) BitSet bs = new BitSet(64); int pos = 0; if (useBinary) { for (int i = isoHeaderLength + 2; i < isoHeaderLength + 10; i++) { int bit = 128; for (int b = 0; b < 8; b++) { bs.set(pos++, (buf[i] & bit) != 0); bit >>= 1; } } //Check for secondary bitmap and parse if necessary if (bs.get(0)) { for (int i = isoHeaderLength + 10; i < isoHeaderLength + 18; i++) { int bit = 128; for (int b = 0; b < 8; b++) { bs.set(pos++, (buf[i] & bit) != 0); bit >>= 1; } } pos = 18 + isoHeaderLength; } else { pos = 10 + isoHeaderLength; } } else { for (int i = isoHeaderLength + 4; i < isoHeaderLength + 20; i++) { int hex = Integer.parseInt(new String(buf, i, 1), 16); bs.set(pos++, (hex & 8) > 0); bs.set(pos++, (hex & 4) > 0); bs.set(pos++, (hex & 2) > 0); bs.set(pos++, (hex & 1) > 0); } //Check for secondary bitmap and parse it if necessary if (bs.get(0)) { for (int i = isoHeaderLength + 20; i < isoHeaderLength + 36; i++) { int hex = Integer.parseInt(new String(buf, i, 1), 16); bs.set(pos++, (hex & 8) > 0); bs.set(pos++, (hex & 4) > 0); bs.set(pos++, (hex & 2) > 0); bs.set(pos++, (hex & 1) > 0); } pos = 36 + isoHeaderLength; } else { pos = 20 + isoHeaderLength; } } //Parse each field Integer itype = new Integer(type); Map parseGuide = (Map) parseMap.get(itype); List index = (List) parseOrder.get(itype); for (Iterator iter = index.iterator(); iter.hasNext();) { Integer i = (Integer) iter.next(); FieldParseInfo fpi = (FieldParseInfo) parseGuide.get(i); if (bs.get(i.intValue() - 1)) { IsoValue val = useBinary ? fpi.parseBinary(buf, pos) : fpi.parse(buf, pos); m.setField(i.intValue(), val); if (useBinary && !(val.getType() == IsoType.ALPHA || val.getType() == IsoType.LLVAR || val.getType() == IsoType.LLLVAR)) { pos += (val.getLength() / 2) + (val.getLength() % 2); } else { pos += val.getLength(); } if (val.getType() == IsoType.LLVAR) { pos += useBinary ? 1 : 2; } else if (val.getType() == IsoType.LLLVAR) { pos += useBinary ? 2 : 3; } } } return m; }
From source file:Main.java
public static byte[] encodedSeptetsToUnencodedSeptets(byte[] octetBytes, boolean discardLast) { byte newBytes[]; BitSet bitSet;/*from w w w .j a va 2 s.c om*/ int i, j, value1, value2; bitSet = new BitSet(octetBytes.length * 8); value1 = 0; for (i = 0; i < octetBytes.length; i++) for (j = 0; j < 8; j++) { value1 = (i * 8) + j; if ((octetBytes[i] & (1 << j)) != 0) bitSet.set(value1); } value1++; // this is a bit count NOT a byte count value2 = value1 / 7 + ((value1 % 7 != 0) ? 1 : 0); // big diff here //System.out.println(octetBytes.length); //System.out.println(value1+" --> "+value2); if (value2 == 0) value2++; newBytes = new byte[value2]; for (i = 0; i < value2; i++) { for (j = 0; j < 7; j++) { if ((value1 + 1) > (i * 7 + j)) { if (bitSet.get(i * 7 + j)) { newBytes[i] |= (byte) (1 << j); } } } } if (discardLast && octetBytes.length * 8 % 7 > 0) { // when decoding a 7bit encoded string // the last septet may become 0, this should be discarded // since this is an artifact of the encoding not part of the // original string // this is only done for decoding 7bit encoded text NOT for // reversing octets to septets (e.g. for the encoding the UDH) if (newBytes[newBytes.length - 1] == 0) { byte[] retVal = new byte[newBytes.length - 1]; System.arraycopy(newBytes, 0, retVal, 0, retVal.length); return retVal; } } return newBytes; }
From source file:org.wso2.andes.subscription.ClusterSubscriptionBitMapHandler.java
/** * @param constituentIndex The index to create the constituent for * @return The created constituent table *///from w ww . j a v a2 s.c om private Map<String, BitSet> addConstituentTable(int constituentIndex) { Map<String, BitSet> constituentTable = new HashMap<String, BitSet>(); BitSet nullBitSet = new BitSet(wildCardSubscriptionList.size()); BitSet otherBitSet = new BitSet(wildCardSubscriptionList.size()); // Fill null and other constituent values for all available subscriptions for (int subscriptionIndex = 0; subscriptionIndex < wildCardSubscriptionList.size(); subscriptionIndex++) { String[] constituentsOfSubscription = subscriptionConstituents.get(subscriptionIndex); if (constituentsOfSubscription.length < constituentIndex + 1) { // There is no constituent in this subscription for this constituent index nullBitSet.set(subscriptionIndex); // If last constituent of the subscription is multiLevelWildCard, then any other is a match if (multiLevelWildCard.equals(constituentsOfSubscription[constituentsOfSubscription.length - 1])) { otherBitSet.set(subscriptionIndex); } } else { String subscriptionConstituent = constituentsOfSubscription[constituentIndex]; // Check if this is a wildcard if (multiLevelWildCard.equals(subscriptionConstituent) || singleLevelWildCard.equals(subscriptionConstituent)) { otherBitSet.set(subscriptionIndex); } } } // Add 'null' and 'other' constituent constituentTable.put(NULL_CONSTITUENT, nullBitSet); constituentTable.put(OTHER_CONSTITUENT, otherBitSet); constituentTables.add(constituentIndex, constituentTable); return constituentTable; }
From source file:com.moadbus.banking.iso.core.protocol.MessageFactory.java
/** * Creates a new message instance from the buffer, which must contain a * valid ISO8583 message. If the factory is set to use binary messages then * it will try to parse a binary message. * * @param buf/*w w w.java2s . com*/ * The byte buffer containing the message. Must not include the * length header. * @param isoHeaderLength * The expected length of the ISO header, after which the message * type and the rest of the message must come. */ public IsoMessage parseMessage(byte[] buf, int isoHeaderLength) throws ParseException { log.debug(" Message length =" + buf.length); IsoMessage m = new IsoMessage(isoHeaderLength > 0 ? new String(buf, 0, isoHeaderLength) : null); // TODO it only parses ASCII messages for now int type = 0; if (useBinary) { type = ((buf[isoHeaderLength] & 0xff) << 8) | (buf[isoHeaderLength + 1] & 0xff); } else { type = ((buf[isoHeaderLength] - 48) << 12) | ((buf[isoHeaderLength + 1] - 48) << 8) | ((buf[isoHeaderLength + 2] - 48) << 4) | (buf[isoHeaderLength + 3] - 48); } m.setType(type); // Parse the bitmap (primary first) BitSet bs = new BitSet(64); int pos = 0; if (useBinary) { for (int i = isoHeaderLength + 2; i < isoHeaderLength + 10; i++) { int bit = 128; for (int b = 0; b < 8; b++) { bs.set(pos++, (buf[i] & bit) != 0); bit >>= 1; } } // Check for secondary bitmap and parse if necessary if (bs.get(0)) { for (int i = isoHeaderLength + 10; i < isoHeaderLength + 18; i++) { int bit = 128; for (int b = 0; b < 8; b++) { bs.set(pos++, (buf[i] & bit) != 0); bit >>= 1; } } pos = 18 + isoHeaderLength; } else { pos = 10 + isoHeaderLength; } } else { for (int i = isoHeaderLength + 4; i < isoHeaderLength + 20; i++) { int hex = Integer.parseInt(new String(buf, i, 1), 16); bs.set(pos++, (hex & 8) > 0); bs.set(pos++, (hex & 4) > 0); bs.set(pos++, (hex & 2) > 0); bs.set(pos++, (hex & 1) > 0); } // Check for secondary bitmap and parse it if necessary if (bs.get(0)) { for (int i = isoHeaderLength + 20; i < isoHeaderLength + 36; i++) { int hex = Integer.parseInt(new String(buf, i, 1), 16); bs.set(pos++, (hex & 8) > 0); bs.set(pos++, (hex & 4) > 0); bs.set(pos++, (hex & 2) > 0); bs.set(pos++, (hex & 1) > 0); } pos = 36 + isoHeaderLength; } else { pos = 20 + isoHeaderLength; } } // Parse each field Map<Integer, FieldParseInfo> parseGuide = parseMap.get(type); List<Integer> index = parseOrder.get(type); log.debug(" Parsing bit "); log.debug(" Total index =" + index.size()); for (Integer i : index) { FieldParseInfo fpi = parseGuide.get(i); if (i == 124) { if (1 == 1) ; } log.debug((i) + ","); if (bs.get(i - 1)) { IsoValue val = useBinary ? fpi.parseBinary(buf, pos) : fpi.parse(buf, pos); log.debug("bit [" + i + "] len=" + val.getLength() + " val=" + val); m.setField(i, val); if (useBinary && !(val.getType() == IsoType.ALPHA || val.getType() == IsoType.LLVAR || val.getType() == IsoType.LLLVAR)) { pos += (val.getLength() / 2) + (val.getLength() % 2); } else { pos += val.getLength(); } if (val.getType() == IsoType.LLVAR) { pos += useBinary ? 1 : 2; } else if (val.getType() == IsoType.LLLVAR) { pos += useBinary ? 2 : 3; } } } log.debug("...done"); return m; }
From source file:hivemall.dataset.LogisticRegressionDataGeneratorUDTF.java
private void generateSparseData() throws HiveException { float label = rnd1.nextFloat(); float sign = (label <= prob_one) ? 1.f : 0.f; labels[position] = classification ? sign : label; String[] features = featuresArray[position]; assert (features != null); final BitSet used = new BitSet(n_dimensions); int searchClearBitsFrom = 0; for (int i = 0, retry = 0; i < n_features; i++) { int f = rnd2.nextInt(n_dimensions); if (used.get(f)) { if (retry < 3) { --i;/*from ww w . jav a 2 s. com*/ ++retry; continue; } searchClearBitsFrom = used.nextClearBit(searchClearBitsFrom); f = searchClearBitsFrom; } used.set(f); float w = (float) rnd2.nextGaussian() + (sign * eps); String y = f + ":" + w; features[i] = y; retry = 0; } if (sort) { Arrays.sort(features, new Comparator<String>() { @Override public int compare(String o1, String o2) { int i1 = Integer.parseInt(o1.split(":")[0]); int i2 = Integer.parseInt(o2.split(":")[0]); return Primitives.compare(i1, i2); } }); } }