List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void consumeMap(Text text, Map<String, String> tags) { tags.clear();// www .j a v a 2 s. c o m if (text.getLength() > 0) { byte[] tagsBytes = text.getBytes(); if (tagsBytes[0] != Separators[MapStart]) return; int i1 = 1; while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) { int i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator]) i2++; String key = new String(tagsBytes, i1, i2 - i1); i1 = i2 + 1; i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator] && tagsBytes[i2] != Separators[MapEnd]) i2++; String value = new String(tagsBytes, i1, i2 - i1); tags.put(key, value); i1 = i2; if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator]) i1++; } text.set(tagsBytes, i1, text.getLength() - i1); } }
From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java
License:Apache License
/** * Reads the next line from input and return true if a line was read. * If no more lines are available in this split, a false is returned. * @param value/*from w ww.ja v a 2 s .co m*/ * @return * @throws IOException */ protected boolean nextLine(Text value) throws IOException { if (blockType == BlockType.RTREE && pos == 8) { // File is positioned at the RTree header // Skip the header and go to first data object in file pos += RTree.skipHeader(in); LOG.info("Skipped R-tree to position: " + pos); // Reinitialize record reader at the new position lineReader = new LineReader(in); } while (getFilePosition() <= end) { value.clear(); int b = 0; if (buffer != null) { // Read the first line encountered in buffer int eol = RTree.skipToEOL(buffer, 0); b += eol; value.append(buffer, 0, eol); if (eol < buffer.length) { // There are still some bytes remaining in buffer byte[] tmp = new byte[buffer.length - eol]; System.arraycopy(buffer, eol, tmp, 0, tmp.length); } else { buffer = null; } // Check if a complete line has been read from the buffer byte last_byte = value.getBytes()[value.getLength() - 1]; if (last_byte == '\n' || last_byte == '\r') return true; } // Read the first line from stream Text temp = new Text(); b += lineReader.readLine(temp); if (b == 0) { // Indicates an end of stream return false; } pos += b; // Append the part read from stream to the part extracted from buffer value.append(temp.getBytes(), 0, temp.getLength()); if (value.getLength() > 1) { // Read a non-empty line. Note that end-of-line character is included return true; } } // Reached end of file return false; }
From source file:com.ricemap.spateDB.operations.Tail.java
License:Apache License
/** * Reads a maximum of n lines from the stream starting from its current * position and going backward.//www . j a v a2 s . c o m * * @param in - An input stream. It'll be scanned from its current position * backward till position 0 * @param n - Maximum number of lines to return * @param stockObject - An object used to deserialize lines read. It can * be set to <code>null</code> if output is also <code>null</code>. In this * case, nothing is reported to the output. * @param output - An output collector used to report lines read. * @return - The position of the beginning of the earliest line read from * buffer. * @throws IOException */ public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject, ResultCollector<T> output) throws IOException { int lines_read = 0; long end = in.getPos(); long offset_of_last_eol = end; long last_read_byte = end; LongWritable line_offset = new LongWritable(); Text read_line = new Text(); Text remainder_from_last_buffer = new Text(); byte[] buffer = new byte[4096]; while (last_read_byte > 0 && lines_read < n) { // Read next chunk from the back long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length; in.seek(first_byte_to_read); int bytes_to_read = (int) (last_read_byte - first_byte_to_read); in.read(buffer, 0, bytes_to_read); last_read_byte = first_byte_to_read; // Iterate over bytes in this buffer int i_last_byte_consumed_in_buffer = bytes_to_read; int i_last_byte_examined_in_buffer = bytes_to_read; while (i_last_byte_examined_in_buffer > 0 && lines_read < n) { byte byte_examined = buffer[--i_last_byte_examined_in_buffer]; if (byte_examined == '\n' || byte_examined == '\r') { // Found an end of line character // Report this to output unless it's empty long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer; if (offset_of_last_eol - offset_of_this_eol > 1) { if (output != null) { read_line.clear(); // +1 is to skip the EOL at the beginning read_line.append(buffer, i_last_byte_examined_in_buffer + 1, i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1)); // Also append bytes remaining from last buffer if (remainder_from_last_buffer.getLength() > 0) { read_line.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); } line_offset.set(offset_of_this_eol + 1); stockObject.fromText(read_line); output.collect(stockObject); } lines_read++; remainder_from_last_buffer.clear(); } i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer; offset_of_last_eol = offset_of_this_eol; } } if (i_last_byte_consumed_in_buffer > 0) { // There are still some bytes not consumed in buffer if (remainder_from_last_buffer.getLength() == 0) { // Store whatever is remaining in remainder_from_last_buffer remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer); } else { // Prepend remaining bytes to Text Text t = new Text(); t.append(buffer, 0, i_last_byte_consumed_in_buffer); t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); remainder_from_last_buffer = t; } } } if (lines_read < n && remainder_from_last_buffer.getLength() > 0) { // There is still one last line needs to be reported lines_read++; if (output != null) { read_line = remainder_from_last_buffer; line_offset.set(0); stockObject.fromText(read_line); output.collect(stockObject); } offset_of_last_eol = -1; } return offset_of_last_eol + 1; }
From source file:com.rramos.bigdata.utils.GenericUDFSha2.java
License:Apache License
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (digest == null) { return null; }//from w w w. j a va2s . c om digest.reset(); if (isStr) { Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters); if (n == null) { return null; } digest.update(n.getBytes(), 0, n.getLength()); } else { BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters); if (bWr == null) { return null; } digest.update(bWr.getBytes(), 0, bWr.getLength()); } byte[] resBin = digest.digest(); String resStr = Hex.encodeHexString(resBin); output.set(resStr); return output; }
From source file:com.spotify.hdfs2cass.CassandraPartitioner.java
License:Apache License
@Override public int getPartition(Text key, Text value, int numReducers) { final int partition; final BigIntegerToken token = partitioner.getToken(ByteBuffer.wrap(key.getBytes())); final int index = Collections.binarySearch(tokenNodes, new TokenNode(token), SEARCH_COMPARATOR); if (index >= 0) { final int multiple = numReducers / tokenNodes.size(); partition = index + (multiple * RANDOM.nextInt(multiple)); } else {//from w w w. jav a2s . c o m throw new RuntimeException("Failed to find a node for token " + token); } return partition; }
From source file:com.transwarp.hbase.bulkload.withindex.TextWithIndexSortReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30)); Iterator<Text> iter = lines.iterator(); boolean qualifier = context.getConfiguration().getBoolean("indexqualifier", false); while (iter.hasNext()) { // Get the prefix to judge whethre primary table(Prefix == 0) or index table (prefix > 0) int rowkeyPrefix = Bytes.toInt(rowKey.get(), 0, 4); byte[] rowKeyWithoutPrefix = Bytes.tail(rowKey.get(), rowKey.get().length - 4); Set<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { Text line = iter.next(); String lineStr = line.toString(); try { Put p = null;// ww w . jav a 2s . c o m if (rowkeyPrefix == 0) { ArrayList<String> parsedLine = ParsedLine.parse(converter.getRecordSpec(), lineStr); p = converter.convert(parsedLine, rowKeyWithoutPrefix); } else { p = new Put(rowKeyWithoutPrefix); if (qualifier) { p.add(family, line.getBytes(), emptyByte); } else { p.add(family, this.qualifier, line.getBytes()); } } if (p != null) { for (List<KeyValue> kvs : p.getFamilyMap().values()) { for (KeyValue kv : kvs) { map.add(kv); curSize += kv.getLength(); } } } } catch (FormatException badLine) { if (skipBadLines) { System.err.println("Bad line." + badLine.getMessage()); incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line." + e.getMessage()); incrementBadLineCount(1); return; } throw new IOException(e); } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { context.write(rowKey, kv); if (++index > 0 && index % 100 == 0) context.setStatus("Wrote " + index + " key values."); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline.java
License:Apache License
public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) { // Read back the commits to make sure Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath()); try (SequenceFile.Reader reader = new SequenceFile.Reader(metaClient.getHadoopConf(), SequenceFile.Reader.file(archiveLogPath))) { Text key = new Text(); Text val = new Text(); while (reader.next(key, val)) { // TODO - limit the number of commits loaded in memory. this could get very large. // This is okay because only tooling will load the archived commit timeline today readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength())); }// w ww.j a v a 2s . c o m this.setInstants(readCommits.keySet().stream() .map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)) .collect(Collectors.toList())); } catch (IOException e) { throw new HoodieIOException("Could not load archived commit timeline from path " + archiveLogPath, e); } // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails; this.metaClient = metaClient; }
From source file:com.yahoo.glimmer.indexing.generator.DocumentMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text record, Context context) throws IOException, InterruptedException { doc.setContent(record.getBytes(), record.getLength()); if (doc == null || doc.getSubject() == null) { // Failed parsing context.getCounter(Counters.FAILED_PARSING).increment(1); LOG.error("Document failed parsing"); return;/*from w w w . j av a2s . co m*/ } if (doc.getId() < 0) { throw new IllegalStateException("Negative docId:" + doc.getId() + " subject:" + doc.getSubject()); } // This is used to write the position of the last occurrence and testing // if the fakeDocOccurrrence for the term has already been written. Map<String, DocStat> termToDocStatMap = new HashMap<String, DocStat>(); // Iterate over all indices for (int indexId = 0; indexId < fields.length; indexId++) { String fieldName = fields[indexId]; if (fieldName.startsWith("NOINDEX")) { continue; } TermValue indexIdValue = new TermValue(Type.INDEX_ID, indexId); // Iterate in parallel over the words of the indices MutableString term = new MutableString(""); MutableString nonWord = new MutableString(""); WordReader termReader = doc.content(indexId); int position = 0; while (termReader.next(term, nonWord)) { // Read next property as well if (term != null && term.length() > 0) { String termString = term.toString(); // Report progress context.setStatus(fields[indexId] + "=" + term.substring(0, Math.min(term.length(), 50))); // Create an occurrence at the next position TermValue occurrenceValue = new TermValue(Type.OCCURRENCE, doc.getId(), position); context.write(new TermKey(termString, indexId, occurrenceValue), occurrenceValue); DocStat docStat = termToDocStatMap.get(termString); if (docStat == null) { if (doc.getIndexType() == RDFDocumentFactory.IndexType.VERTICAL) { // For the Alignment Index, we write the predicate // id(Which is equal to the index id for a VERTICAL // index) the first time we encounter a term. // The 'Alignment Index' is an index without counts // or positions. It's used for query optimization in // the query parser. The resulting 'alignment index' // is basically used as a map from term to // predicates that the term occurs in. context.write(new TermKey(termString, ALIGNMENT_INDEX, indexIdValue), indexIdValue); } docStat = new DocStat(); docStat.last = position; docStat.count = 1; termToDocStatMap.put(termString, docStat); } else { docStat.last = position; docStat.count++; } position++; context.getCounter(Counters.INDEXED_OCCURRENCES).increment(1); } else { LOG.info("Nextterm is null"); } } if (doc.getIndexType() == RDFDocumentFactory.IndexType.HORIZONTAL && position > 0) { TermValue docSizeValue = new TermValue(Type.DOC_SIZE, doc.getId(), position); context.write(new TermKey(TermKey.DOC_SIZE_TERM, indexId, docSizeValue), docSizeValue); } for (String termString : termToDocStatMap.keySet()) { DocStat docStat = termToDocStatMap.get(termString); TermValue occurrenceCountValue = new TermValue(Type.TERM_STATS, docStat.count, docStat.last); context.write(new TermKey(termString, indexId, occurrenceCountValue), occurrenceCountValue); } termToDocStatMap.clear(); } context.getCounter(Counters.NUMBER_OF_RECORDS).increment(1); }
From source file:core.data.ConditionalMutation.java
License:Apache License
public ConditionalMutation(Text row) { this(row.getBytes(), 0, row.getLength()); }
From source file:cosmos.impl.IndexToMultimapRecord.java
License:Apache License
@Override public MultimapRecord apply(Entry<Key, Value> input) { Key k = input.getKey();/*ww w. ja va 2 s . com*/ Text colqual = k.getColumnQualifier(); int index = colqual.find(Defaults.NULL_BYTE_STR); if (-1 == index) { throw new RuntimeException("Was provided unexpected Key: " + k); } int start = index + 1; try { String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start); return sorts.contents(id, docId); } catch (TableNotFoundException e) { throw new RuntimeException(e); } catch (UnexpectedStateException e) { throw new RuntimeException(e); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }