List of usage examples for java.util BitSet get
public boolean get(int bitIndex)
From source file:com.dianping.puma.parser.mysql.event.AbstractRowsEvent.java
/** * /*from ww w .j a v a 2s.c om*/ * @see http://code.google.com/p/open-replicator/ * @param buf * @param usedColumns * @return * @throws IOException */ protected Row parseRow(ByteBuffer buf, BitSet usedColumns) throws IOException { int unusedColumnCount = 0; byte[] types = tableMapEvent.getColumnTypes(); Metadata metadata = tableMapEvent.getColumnMetadata(); BitSet nullColumns = PacketUtils.readBitSet(buf, types.length); List<Column> columns = new ArrayList<Column>(types.length); for (int i = 0; i < types.length; ++i) { int length = 0; int meta = metadata.getMetadata(i); int type = CodecUtils.toUnsigned(types[i]); if (type == BinlogConstants.MYSQL_TYPE_STRING && meta > 256) { int meta0 = meta >> 8; int meta1 = meta & 0xFF; if ((meta0 & 0x30) != 0x30) { type = meta0 | 0x30; length = meta1 | (((meta0 & 0x30) ^ 0x30) << 4); } else { switch (meta0) { case BinlogConstants.MYSQL_TYPE_SET: case BinlogConstants.MYSQL_TYPE_ENUM: case BinlogConstants.MYSQL_TYPE_STRING: type = meta0; length = meta1; break; default: throw new NestableRuntimeException("assertion failed, unknown column type: " + type); } } } if (!usedColumns.get(i)) { unusedColumnCount++; continue; } else if (nullColumns.get(i - unusedColumnCount)) { columns.add(NullColumn.valueOf(type)); continue; } int value = 0; switch (type) { case BinlogConstants.MYSQL_TYPE_TINY: value = PacketUtils.readInt(buf, 1); value = (value << 24) >> 24; columns.add(TinyColumn.valueOf(value)); break; case BinlogConstants.MYSQL_TYPE_SHORT: value = PacketUtils.readInt(buf, 2); value = (value << 16) >> 16; columns.add(ShortColumn.valueOf(value)); break; case BinlogConstants.MYSQL_TYPE_INT24: value = PacketUtils.readInt(buf, 3); value = (value << 8) >> 8; columns.add(Int24Column.valueOf(value)); break; case BinlogConstants.MYSQL_TYPE_INT: columns.add(IntColumn.valueOf(PacketUtils.readInt(buf, 4))); break; case BinlogConstants.MYSQL_TYPE_LONGLONG: columns.add(LongLongColumn.valueOf(PacketUtils.readLong(buf, 8))); break; case BinlogConstants.MYSQL_TYPE_FLOAT: columns.add(FloatColumn.valueOf(Float.intBitsToFloat(PacketUtils.readInt(buf, 4)))); break; case BinlogConstants.MYSQL_TYPE_DOUBLE: columns.add(DoubleColumn.valueOf(Double.longBitsToDouble(PacketUtils.readLong(buf, 8)))); break; case BinlogConstants.MYSQL_TYPE_YEAR: columns.add(YearColumn.valueOf(MySQLUtils.toYear((short) PacketUtils.readInt(buf, 1)))); break; case BinlogConstants.MYSQL_TYPE_DATE: columns.add(DateColumn.valueOf(MySQLUtils.toDate(PacketUtils.readInt(buf, 3)))); break; case BinlogConstants.MYSQL_TYPE_TIME: columns.add(TimeColumn.valueOf(MySQLUtils.toTime(PacketUtils.readInt(buf, 3)))); break; case BinlogConstants.MYSQL_TYPE_TIMESTAMP: columns.add(TimestampColumn.valueOf(PacketUtils.readLong(buf, 4))); break; case BinlogConstants.MYSQL_TYPE_DATETIME: columns.add(DatetimeColumn.valueOf(MySQLUtils.toDatetime(PacketUtils.readLong(buf, 8)))); break; case BinlogConstants.MYSQL_TYPE_ENUM: columns.add(EnumColumn.valueOf(PacketUtils.readInt(buf, length))); break; case BinlogConstants.MYSQL_TYPE_SET: columns.add(SetColumn.valueOf(PacketUtils.readLong(buf, length))); break; case BinlogConstants.MYSQL_TYPE_STRING: final int stringLength = length < 256 ? PacketUtils.readInt(buf, 1) : PacketUtils.readInt(buf, 2); columns.add(StringColumn.valueOf(PacketUtils.readBytes(buf, stringLength))); break; case BinlogConstants.MYSQL_TYPE_BIT: final int bitLength = (meta >> 8) * 8 + (meta & 0xFF); columns.add(BitColumn.valueOf(bitLength, PacketUtils.readBit(buf, bitLength, false))); break; case BinlogConstants.MYSQL_TYPE_NEWDECIMAL: final int precision = meta & 0xFF; final int scale = meta >> 8; final int decimalLength = MySQLUtils.getDecimalBinarySize(precision, scale); columns.add(DecimalColumn.valueOf( MySQLUtils.toDecimal(precision, scale, PacketUtils.readBytes(buf, decimalLength)), precision, scale)); break; case BinlogConstants.MYSQL_TYPE_BLOB: final int blobLength = PacketUtils.readInt(buf, meta); columns.add(BlobColumn.valueOf(PacketUtils.readBytes(buf, blobLength))); break; case BinlogConstants.MYSQL_TYPE_VARCHAR: case BinlogConstants.MYSQL_TYPE_VAR_STRING: final int varcharLength = meta < 256 ? PacketUtils.readInt(buf, 1) : PacketUtils.readInt(buf, 2); columns.add(StringColumn.valueOf(PacketUtils.readBytes(buf, varcharLength))); break; case BinlogConstants.MYSQL_TYPE_TIME2: final int timeValue = PacketUtils.readInt(buf, 3, false); final int timeNanos = PacketUtils.readInt(buf, (meta + 1) / 2, false); columns.add(Time2Column.valueOf(MySQLUtils.toTime2(timeValue, timeNanos, meta))); break; case BinlogConstants.MYSQL_TYPE_DATETIME2: final long dateTimeValue = PacketUtils.readLong(buf, 5, false); final int dateTimenanos = PacketUtils.readInt(buf, (meta + 1) / 2, false); columns.add(Datetime2Column.valueOf(MySQLUtils.toDatetime2(dateTimeValue, dateTimenanos, meta))); break; case BinlogConstants.MYSQL_TYPE_TIMESTAMP2: final long timeStampValue = PacketUtils.readLong(buf, 4, false); final int timeStampNanos = PacketUtils.readInt(buf, (meta + 1) / 2, false); columns.add( Timestamp2Column.valueOf(MySQLUtils.toTimestamp2(timeStampValue, timeStampNanos, meta))); break; default: throw new NestableRuntimeException("assertion failed, unknown column type: " + type); } } return new Row(columns); }
From source file:ArrayUtils.java
/** * Removes all contents of <code>array2</code> from <code>array1</code>. All * instances of <code>array2</code> will also be removed from * <code>array1</code>.// ww w .jav a 2s .c om * * @param <T> * The type of the array * @param array1 * The array to remove elements from * @param array2 * The array containing the elements to remove; or the element to * remove itself * @return <code>array1</code> missing all the contents of * <code>array2</code> */ public static <T> T[] removeAll(T[] array1, Object array2) { if (array1 == null || array2 == null) return array1; if (!array1.getClass().isArray()) return null; if (!array2.getClass().isArray()) array2 = new Object[] { array2 }; java.util.BitSet remove = new java.util.BitSet(); int len1 = array1.length; int len2 = Array.getLength(array2); int i, j; for (i = 0; i < len1; i++) { for (j = 0; j < len2; j++) { if (equals(array1[i], Array.get(array2, j))) { remove.set(i); break; } } } T[] ret = (T[]) Array.newInstance(array1.getClass().getComponentType(), len1 - remove.cardinality()); // This copying section might be replaced by a more efficient version // using System.arraycopy()--this would be much faster than reflection, // especially for large arrays needing only a few elements removed for (i = 0, j = 0; i < len1; i++) { if (!remove.get(i)) { ret[j] = array1[i]; j++; } } return ret; }
From source file:ArrayUtils.java
/** * Removes all contents of <code>array2</code> from <code>array1</code>. All * instances of <code>array2</code> will also be removed from * <code>array1</code>. For primitive types. * //from w ww. ja v a2 s.c om * @param array1 * The array to remove elements from * @param array2 * The array containing the elements to remove; or the element to * remove itself * @return <code>array1</code> missing all the contents of * <code>array2</code> */ public static Object removeAllP(Object array1, Object array2) { if (array1 == null || array2 == null) return array1; if (!array1.getClass().isArray()) return null; if (!array2.getClass().isArray()) array2 = new Object[] { array2 }; else array2 = addP(array2, array2); java.util.BitSet remove = new java.util.BitSet(); int len1 = Array.getLength(array1); int len2 = Array.getLength(array2); int i, j; for (i = 0; i < len1; i++) { for (j = 0; j < len2; j++) { if (equals(Array.get(array1, i), Array.get(array2, j))) { remove.set(i); break; } } } Object ret = Array.newInstance(array1.getClass().getComponentType(), len1 - remove.cardinality()); // This copying section might be replaced by a more efficient version // using System.arraycopy()--this would be much faster than reflection, // especially for large arrays needing only a few elements removed for (i = 0, j = 0; i < len1; i++) { if (!remove.get(i)) { put(ret, Array.get(array1, i), j); j++; } } return ret; }
From source file:de.unijena.bioinf.FragmentationTreeConstruction.computation.FragmentationPatternAnalysis.java
/** * Step 3. Normalizing/*www .j a v a2 s . c o m*/ * Merge all peaks within a single spectrum * Return a list of peaks (from all spectra) with relative intensities */ public ProcessedInput performNormalization(ProcessedInput input) { final Ms2Experiment experiment = input.getExperimentInformation(); final double parentMass = experiment.getIonMass(); final ArrayList<ProcessedPeak> peaklist = new ArrayList<ProcessedPeak>(100); final Deviation mergeWindow = getDefaultProfile().getAllowedMassDeviation().divide(2d); final Ionization ion = experiment.getPrecursorIonType().getIonization(); double globalMaxIntensity = 0d; for (Ms2Spectrum s : experiment.getMs2Spectra()) { // merge peaks: iterate them from highest to lowest intensity and remove peaks which // are in the mass range of a high intensive peak final MutableSpectrum<Peak> sortedByIntensity = new SimpleMutableSpectrum(s); Spectrums.sortSpectrumByDescendingIntensity(sortedByIntensity); // simple spectra are always ordered by mass final SimpleSpectrum sortedByMass = new SimpleSpectrum(s); final BitSet deletedPeaks = new BitSet(s.size()); for (int i = 0; i < s.size(); ++i) { // get index of peak in mass-ordered spectrum final double mz = sortedByIntensity.getMzAt(i); final int index = Spectrums.binarySearch(sortedByMass, mz); assert index >= 0; if (deletedPeaks.get(index)) continue; // peak is already deleted // delete all peaks within the mass range for (int j = index - 1; j >= 0 && mergeWindow.inErrorWindow(mz, sortedByMass.getMzAt(j)); --j) deletedPeaks.set(j, true); for (int j = index + 1; j < s.size() && mergeWindow.inErrorWindow(mz, sortedByMass.getMzAt(j)); ++j) deletedPeaks.set(j, true); } final int offset = peaklist.size(); // add all remaining peaks to the peaklist for (int i = 0; i < s.size(); ++i) { if (!deletedPeaks.get(i)) { final ProcessedPeak propeak = new ProcessedPeak( new MS2Peak(s, sortedByMass.getMzAt(i), sortedByMass.getIntensityAt(i))); propeak.setIon(ion); peaklist.add(propeak); } } // now performNormalization spectrum. Ignore peaks near to the parent peak final double lowerbound = parentMass - 0.1d; double scale = 0d; for (int i = offset; i < peaklist.size() && peaklist.get(i).getMz() < lowerbound; ++i) { scale = Math.max(scale, peaklist.get(i).getIntensity()); } if (scale == 0) scale = peaklist.get(0).getIntensity(); // happens for spectra with only one peak // now set local relative intensities for (int i = offset; i < peaklist.size(); ++i) { final ProcessedPeak peak = peaklist.get(i); peak.setLocalRelativeIntensity(peak.getIntensity() / scale); } // and adjust global relative intensity globalMaxIntensity = Math.max(globalMaxIntensity, scale); } // now calculate global normalized intensities for (ProcessedPeak peak : peaklist) { peak.setGlobalRelativeIntensity(peak.getIntensity() / globalMaxIntensity); peak.setRelativeIntensity( normalizationType == NormalizationType.GLOBAL ? peak.getGlobalRelativeIntensity() : peak.getLocalRelativeIntensity()); } // finished! input.setMergedPeaks(peaklist); // postprocess postProcess(PostProcessor.Stage.AFTER_NORMALIZING, input); return input; }
From source file:com.joliciel.jochre.graphics.ShapeImpl.java
@Override public boolean isPixelBlack(int x, int y, int threshold, int whiteGapFillFactor) { if (x < 0 || y < 0 || x >= this.getWidth() || y >= this.getHeight()) return false; BitSet bitset = this.getBlackAndWhiteBitSet(threshold, whiteGapFillFactor); return bitset.get(y * this.getWidth() + x); }
From source file:org.apache.hadoop.mapreduce.lib.input.TestMRKeyValueTextInputFormat.java
@Test public void testSplitableCodecs() throws Exception { final Job job = Job.getInstance(defaultConf); final Configuration conf = job.getConfiguration(); // Create the codec CompressionCodec codec = null;//from w ww. j a v a 2s.c o m try { codec = (CompressionCodec) ReflectionUtils .newInstance(conf.getClassByName("org.apache.hadoop.io.compress.BZip2Codec"), conf); } catch (ClassNotFoundException cnfe) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.getDefaultExtension()); int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int MAX_LENGTH = 500000; FileInputFormat.setMaxInputSplitSize(job, MAX_LENGTH / 20); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 4) + 1) { LOG.info("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(codec.createOutputStream(localFs.create(file))); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); assertTrue("KVTIF claims not splittable", format.isSplitable(job, file)); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / 2000) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.size(); j++) { LOG.debug("split[" + j + "]= " + splits.get(j)); TaskAttemptContext context = MapReduceTestUtil .createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<Text, Text> reader = format.createRecordReader(splits.get(j), context); Class<?> clazz = reader.getClass(); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), splits.get(j)); reader.initialize(splits.get(j), mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.nextKeyValue()) { key = reader.getCurrentKey(); value = reader.getCurrentValue(); final int k = Integer.parseInt(key.toString()); final int v = Integer.parseInt(value.toString()); assertEquals("Bad key", 0, k % 2); assertEquals("Mismatched key/value", k / 2, v); LOG.debug("read " + k + "," + v); assertFalse(k + "," + v + " in multiple partitions.", bits.get(v)); bits.set(v); count++; } if (count > 0) { LOG.info("splits[" + j + "]=" + splits.get(j) + " count=" + count); } else { LOG.debug("splits[" + j + "]=" + splits.get(j) + " count=" + count); } } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
From source file:net.sf.extjwnl.princeton.file.PrincetonRandomAccessDictionaryFile.java
private String renderSynset(Synset synset) { int estLength = offsetLength + 1//offset + 2 + 1 //lexfilenum + 1//ss_type + offsetLength + 1//w_cnt + (10 + 3 + 1) * synset.getWords().size()//avg word 10 chars + lex_id max 3 chars + offsetLength + 1//p_cnt + (1 + 1 + offsetLength + 1 + 1 + 1 + 4 + 1) * synset.getPointers().size() + synset.getGloss().length() + 2 + 2; if (POS.VERB == synset.getPOS()) { estLength = estLength + 8 * synset.getWords().size();//8 for verb flag, about one per word }/* w w w . ja v a 2 s . c o m*/ //synset_offset lex_filenum ss_type w_cnt word lex_id [word lex_id...] p_cnt [ptr...] [frames...] | gloss //w_cnt Two digit hexadecimal integer indicating the number of words in the synset. String posKey = synset.getPOS().getKey(); if (POS.ADJECTIVE == synset.getPOS() && synset.isAdjectiveCluster()) { posKey = POS.ADJECTIVE_SATELLITE_KEY; } if (checkLexFileNumber && log.isWarnEnabled() && !LexFileIdFileNameMap.getMap().containsKey(synset.getLexFileNum())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_001", synset.getLexFileNum())); } if (checkWordCountLimit && log.isWarnEnabled() && (0xFF < synset.getWords().size())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_004", new Object[] { synset.getOffset(), synset.getWords().size() })); } StringBuilder result = new StringBuilder(estLength); formatOffset(synset.getOffset(), offsetLength, result); if (synset.getLexFileNum() < 10) { result.append(" 0").append(synset.getLexFileNum()); } else { result.append(" ").append(synset.getLexFileNum()); } result.append(" ").append(posKey); if (synset.getWords().size() < 0x10) { result.append(" 0").append(Integer.toHexString(synset.getWords().size())).append(" "); } else { result.append(" ").append(Integer.toHexString(synset.getWords().size())).append(" "); } for (Word w : synset.getWords()) { //ASCII form of a word as entered in the synset by the lexicographer, with spaces replaced by underscore characters (_ ). The text of the word is case sensitive. //lex_id One digit hexadecimal integer that, when appended onto lemma , uniquely identifies a sense within a lexicographer file. String lemma = w.getLemma().replace(' ', '_'); if (w instanceof Adjective) { Adjective a = (Adjective) w; if (AdjectivePosition.NONE != a.getAdjectivePosition()) { lemma = lemma + "(" + a.getAdjectivePosition().getKey() + ")"; } } if (checkLexIdLimit && log.isWarnEnabled() && (0xF < w.getLexId())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_005", new Object[] { synset.getOffset(), w.getLemma(), w.getLexId() })); } result.append(lemma).append(" "); result.append(Long.toHexString(w.getLexId())).append(" "); } //Three digit decimal integer indicating the number of pointers from this synset to other synsets. If p_cnt is 000 the synset has no pointers. if (checkRelationLimit && log.isWarnEnabled() && (999 < synset.getPointers().size())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_002", new Object[] { synset.getOffset(), synset.getPointers().size() })); } if (synset.getPointers().size() < 100) { result.append("0"); if (synset.getPointers().size() < 10) { result.append("0"); } } result.append(synset.getPointers().size()).append(" "); for (Pointer p : synset.getPointers()) { //pointer_symbol synset_offset pos source/target result.append(p.getType().getKey()).append(" "); //synset_offset is the byte offset of the target synset in the data file corresponding to pos formatOffset(p.getTargetOffset(), offsetLength, result); result.append(" "); //pos result.append(p.getTargetPOS().getKey()).append(" "); //source/target //The source/target field distinguishes lexical and semantic pointers. // It is a four byte field, containing two two-digit hexadecimal integers. // The first two digits indicates the word number in the current (source) synset, // the last two digits indicate the word number in the target synset. // A value of 0000 means that pointer_symbol represents a semantic relation between the current (source) synset and the target synset indicated by synset_offset . //A lexical relation between two words in different synsets is represented by non-zero values in the source and target word numbers. // The first and last two bytes of this field indicate the word numbers in the source and target synsets, respectively, between which the relation holds. // Word numbers are assigned to the word fields in a synset, from left to right, beginning with 1 . if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getSourceIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(), p.getSource().getSynset().getOffset(), p.getSourceIndex() })); } if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getTargetIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(), p.getTarget().getSynset().getOffset(), p.getTargetIndex() })); } if (p.getSourceIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(p.getSourceIndex())); if (p.getTargetIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(p.getTargetIndex())).append(" "); } //frames In data.verb only if (POS.VERB == synset.getPOS()) { BitSet verbFrames = synset.getVerbFrameFlags(); int verbFramesCount = verbFrames.cardinality(); for (Word word : synset.getWords()) { if (word instanceof Verb) { BitSet bits = ((Verb) word).getVerbFrameFlags(); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { //WN TRICK - there are duplicates in data //02593551 41 v 04 lord_it_over 0 queen_it_over 0 put_on_airs 0 act_superior 0 001 @ 02367363 v 0000 // 09 + 02 00 + 02 04 + 22 04 + 02 03 + 22 03 + 08 02 + 09 02 + 08 01 + 09 01 | act like the master of; "He is lording it over the students" // + 02 04 and + 02 03 duplicate + 02 00 // it is the only one, but it causes offsets to differ on WN30 rewrite if (!verbFrames.get(i)) { verbFramesCount++; } } } } if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < verbFramesCount)) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_007", new Object[] { synset.getOffset(), verbFramesCount })); } if (verbFramesCount < 10) { result.append("0"); } result.append(Integer.toString(verbFramesCount)).append(" "); for (int i = verbFrames.nextSetBit(0); i >= 0; i = verbFrames.nextSetBit(i + 1)) { if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < i)) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_008", new Object[] { synset.getOffset(), i })); } result.append("+ "); if (i < 10) { result.append("0"); } result.append(Integer.toString(i)); result.append(" 00 "); } for (Word word : synset.getWords()) { if (word instanceof Verb) { BitSet bits = ((Verb) word).getVerbFrameFlags(); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { if (!verbFrames.get(i)) { if (checkVerbFrameLimit && log.isWarnEnabled() && (0xFF < word.getIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_008", new Object[] { synset.getOffset(), word.getIndex() })); } result.append("+ "); if (i < 10) { result.append("0"); } result.append(Integer.toString(i)).append(" "); if (word.getIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(word.getIndex())).append(" "); } } } } } result.append("| ").append(synset.getGloss()).append(" ");//why every line in most WN files ends with two spaces? return result.toString(); }
From source file:org.apache.fop.pdf.PDFFactory.java
private void buildCIDSet(PDFFontDescriptor descriptor, CIDFont cidFont) { BitSet cidSubset = cidFont.getCIDSubset().getGlyphIndexBitSet(); PDFStream cidSet = makeStream(null, true); ByteArrayOutputStream baout = new ByteArrayOutputStream(cidSubset.length() / 8 + 1); int value = 0; for (int i = 0, c = cidSubset.length(); i < c; i++) { int shift = i % 8; boolean b = cidSubset.get(i); if (b) {// ww w .j a v a 2s. c o m value |= 1 << 7 - shift; } if (shift == 7) { baout.write(value); value = 0; } } baout.write(value); try { cidSet.setData(baout.toByteArray()); descriptor.setCIDSet(cidSet); } catch (IOException ioe) { log.error("Failed to write CIDSet [" + cidFont + "] " + cidFont.getEmbedFontName(), ioe); } }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);//ww w .ja v a 2 s.c om CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = null; Path spillFilePath = null; try { outputFilePath = taskOutput.getOutputFile(); } catch (DiskErrorException e) { if (numRecordsWritten > 0) { fail(); } else { // Record checking not required. return; } } try { spillFilePath = taskOutput.getOutputIndexFile(); } catch (DiskErrorException e) { if (numRecordsWritten > 0) { fail(); } else { // Record checking not required. return; } } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java
/** * Returns the <code>DocId</code> of the parent of <code>n</code> or * {@link DocId#NULL} if <code>n</code> does not have a parent * (<code>n</code> is the root node). * * @param n the document number.//from w ww .ja va 2 s. co m * @param deleted the documents that should be regarded as deleted. * @return the <code>DocId</code> of <code>n</code>'s parent. * @throws IOException if an error occurs while reading from the index. */ DocId getParent(int n, BitSet deleted) throws IOException { DocId parent; boolean existing = false; int parentDocNum = inSegmentParents[n]; if (parentDocNum != -1) { parent = DocId.create(parentDocNum); } else { parent = foreignParentDocIds.get(n); } if (parent != null) { existing = true; // check if valid and reset if necessary if (!parent.isValid(deleted)) { if (log.isDebugEnabled()) { log.debug(parent + " not valid anymore."); } parent = null; } } if (parent == null) { int plainDocId = -1; Document doc = document(n, FieldSelectors.UUID_AND_PARENT); String[] parentUUIDs = doc.getValues(FieldNames.PARENT); if (parentUUIDs.length == 0 || parentUUIDs[0].length() == 0) { // root node parent = DocId.NULL; } else { if (shareableNodes.get(n)) { parent = DocId.create(parentUUIDs); } else { if (!existing) { Term id = TermFactory.createUUIDTerm(parentUUIDs[0]); TermDocs docs = termDocs(id); try { while (docs.next()) { if (!deleted.get(docs.doc())) { plainDocId = docs.doc(); parent = DocId.create(plainDocId); break; } } } finally { docs.close(); } } // if still null, then parent is not in this index, or existing // DocId was invalid. thus, only allowed to create DocId from uuid if (parent == null) { parent = DocId.create(parentUUIDs[0]); } } } // finally put to cache if (plainDocId != -1) { // PlainDocId inSegmentParents[n] = plainDocId; } else { // UUIDDocId foreignParentDocIds.put(n, parent); if (existing) { // there was an existing parent reference in // inSegmentParents, which was invalid and is replaced // with a UUIDDocId (points to a foreign segment). // mark as unknown inSegmentParents[n] = -1; } } } return parent; }