List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:RunText.java
License:Apache License
private static List<String> parse(Text value) { int p = 0;/* w w w .j a v a 2 s . co m*/ List<String> strings = Lists.newArrayList(); while (p < value.getLength()) { int next = find(value, delim, p); if (next == -1) { break; } String s = new String(value.getBytes(), p, next - p); strings.add(s); p = next + 1; } return strings; }
From source file:RunText.java
License:Apache License
private static int find(Text text, byte what, int start) { int len = text.getLength(); int p = start; byte[] bytes = text.getBytes(); boolean inQuotes = false; while (p < len) { if ('\"' == bytes[p]) { inQuotes = !inQuotes;/*from w ww .j a va 2s . c o m*/ } if (!inQuotes && bytes[p] == what) { return p; } p++; } return -1; }
From source file:Importer.java
License:Open Source License
public static Text hash(Text content) throws Exception { StringBuilder sb = new StringBuilder(); sb.append("post_"); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(content.getBytes(), 0, content.getLength()); byte[] bytes = md.digest(); for (int i = 0; i < bytes.length; ++i) { if ((bytes[i] & 0xF0) == 0) sb.append('0'); sb.append(Integer.toHexString(0xFF & bytes[i])); }//from w w w .j a va 2 s.c om return new Text(sb.toString()); }
From source file:TweetTweetTweet.java
License:Open Source License
@Override public void fromText(Text text) { tweet1.fromText(text);/*from w ww.j av a2 s . c om*/ // Skip the Tab text.set(text.getBytes(), 1, text.getLength() - 1); tweet2.fromText(text); // Skip the Tab text.set(text.getBytes(), 1, text.getLength() - 1); tweet3.fromText(text); }
From source file:TestString.java
License:Apache License
@Test public void testTextSubstring() throws Exception { Text text = new Text("string"); Text text1 = new Text(); Text text2 = new Text(); long start = System.nanoTime(); for (int i = 0; i < 100000000; i++) { text1.set(text.getBytes(), 0, 2); text2.set(text.getBytes(), 3, text.getLength() - 3); }/* w w w. ja v a 2 s . c o m*/ long end = System.nanoTime(); System.out.println("TestTextSubString"); System.out.println("text1: " + text1.toString()); System.out.println("text2: " + text2.toString()); System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds."); }
From source file:accumulo.ingest.AbstractAccumuloCsvIngest.java
License:Apache License
protected void setRowId(Text buffer, Text fileName, long recordCount) { final byte[] rowSuffix = lex.encode(recordCount); buffer.clear();/*from w w w . ja va 2 s.c o m*/ buffer.append(fileName.getBytes(), 0, fileName.getLength()); buffer.append(rowSuffix, 0, rowSuffix.length); }
From source file:brush.FastqRecordReader.java
License:Apache License
/** * Position the input stream at the start of the first record. * * @param stream The stream to reposition. *///w ww . java2 s .c o m protected void positionAtFirstRecord(FSDataInputStream stream) throws IOException { Text buffer = new Text(); if (true) { // (start > 0) // use start>0 to assume that files start with valid data // Advance to the start of the first record that ends with /1 // We use a temporary LineReader to read lines until we find the // position of the right one. We then seek the file to that position. stream.seek(start); LineReader reader = new LineReader(stream); int bytesRead = 0; do { bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); int bufferLength = buffer.getLength(); if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) { start += bytesRead; } else { // line starts with @. Read two more and verify that it starts with a + // // If this isn't the start of a record, we want to backtrack to its end long backtrackPosition = start + bytesRead; bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start)); if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') { break; // all good! } else { // backtrack to the end of the record we thought was the start. start = backtrackPosition; stream.seek(start); reader = new LineReader(stream); } } } while (bytesRead > 0); stream.seek(start); } pos = start; }
From source file:brush.FastqRecordReader.java
License:Apache License
/** * Parses a read from an interleaved FASTQ file. * * Only reads a single record.//from w w w . j av a 2 s. c o m * * @param readName Text record containing read name. Output parameter. * @param value Text record containing full record. Output parameter. * @return Returns true if read was successful (did not hit EOF). * * @throws RuntimeException Throws exception if FASTQ record doesn't * have proper formatting (e.g., record doesn't start with @). */ protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException { // ID line readName.clear(); long skipped = appendLineInto(readName, true); pos += skipped; if (skipped == 0) { return false; // EOF } if (readName.getBytes()[0] != '@') { throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage() + ". Line: " + readName + ". \n"); } value.append(readName.getBytes(), 0, readName.getLength()); // sequence appendLineInto(value, false); // separator line appendLineInto(value, false); // quality appendLineInto(value, false); return true; }
From source file:brush.FastqRecordReader.java
License:Apache License
/** * Reads a newline into a text record from the underlying line reader. * * @param dest Text record to read line into. * @param eofOk Whether an EOF is acceptable in this line. * @return Returns the number of bytes read. * * @throws EOFException Throws if eofOk was false and we hit an EOF in * the current line.//from w w w. jav a2 s .c o m */ private int appendLineInto(final Text dest, final boolean eofOk) throws EOFException, IOException { Text buf = new Text(); int bytesRead = lineReader.readLine(buf, MAX_LINE_LENGTH); if (bytesRead < 0 || (bytesRead == 0 && !eofOk)) throw new EOFException(); dest.append(buf.getBytes(), 0, buf.getLength()); dest.append(newline, 0, 1); pos += bytesRead; return bytesRead; }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
protected String makeEncodedString(Object[] context) { Text text = (Text) context[1]; return new String(text.getBytes(), 0, text.getLength(), (Charset) context[2]); }