List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.accumulo.core.util.TextUtil.java
License:Apache License
public static ByteBuffer getByteBuffer(Text text) { if (text == null) return null; byte[] bytes = text.getBytes(); return ByteBuffer.wrap(bytes, 0, text.getLength()); }
From source file:org.apache.accumulo.core.util.TextUtil.java
License:Apache License
public static Text truncate(Text text, int maxLen) { if (text.getLength() > maxLen) { Text newText = new Text(); newText.append(text.getBytes(), 0, maxLen); String suffix = "... TRUNCATED"; newText.append(suffix.getBytes(UTF_8), 0, suffix.length()); return newText; }//from ww w. j a v a 2s . c o m return text; }
From source file:org.apache.accumulo.core.util.TextUtilTest.java
License:Apache License
/** * co/*from w w w . j av a 2 s.c o m*/ */ public void testGetBytes() { String longMessage = "This is some text"; Text longMessageText = new Text(longMessage); String smallerMessage = "a"; Text smallerMessageText = new Text(smallerMessage); Text someText = new Text(longMessage); assertTrue(someText.equals(longMessageText)); someText.set(smallerMessageText); assertTrue(someText.getLength() != someText.getBytes().length); assertTrue(TextUtil.getBytes(someText).length == smallerMessage.length()); assertTrue((new Text(TextUtil.getBytes(someText))).equals(smallerMessageText)); }
From source file:org.apache.accumulo.examples.dirlist.QueryUtil.java
License:Apache License
/** * Returns either the {@link #DIR_COLF} or a decoded string version of the colf. * * @param colf/*from ww w . java2 s .c o m*/ * the column family */ public static String getType(Text colf) { if (colf.equals(DIR_COLF)) return colf.toString() + ":"; return Long.toString(Ingest.encoder.decode(colf.getBytes())) + ":"; }
From source file:org.apache.accumulo.examples.filedata.KeyUtil.java
License:Apache License
/** * Split a text object using a null byte separator into an array of strings. * * @param t/*w w w.j av a 2s . c o m*/ * null-byte separated text object * @return an array of strings */ public static String[] splitNullSepText(Text t) { ArrayList<String> s = new ArrayList<>(); byte[] b = t.getBytes(); int lastindex = 0; for (int i = 0; i < t.getLength(); i++) { if (b[i] == (byte) 0) { s.add(new String(b, lastindex, i - lastindex)); lastindex = i + 1; } } s.add(new String(b, lastindex, t.getLength() - lastindex)); return s.toArray(new String[s.size()]); }
From source file:org.apache.accumulo.examples.simple.filedata.KeyUtil.java
License:Apache License
/** * Split a text object using a null byte separator into an array of strings. * //from w ww . j ava2 s. co m * @param t * null-byte separated text object * @return an array of strings */ public static String[] splitNullSepText(Text t) { ArrayList<String> s = new ArrayList<String>(); byte[] b = t.getBytes(); int lastindex = 0; for (int i = 0; i < t.getLength(); i++) { if (b[i] == (byte) 0) { s.add(new String(b, lastindex, i - lastindex)); lastindex = i + 1; } } s.add(new String(b, lastindex, t.getLength() - lastindex)); return s.toArray(new String[s.size()]); }
From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Article article = extractor//w ww. ja va2s . c om .extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8)); String NULL_BYTE = "\u0000"; String colfPrefix = language + NULL_BYTE; String indexPrefix = "fi" + NULL_BYTE; if (article != null) { int groupId = WikipediaMapper.getPartitionId(article, numGroups); if (groupId != myGroup) { return; } Text partitionId = new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions))); // Create the mutations for the document. // Row is partition id, colf is language0articleid, colq is fieldName\0fieldValue Mutation m = new Mutation(partitionId); for (Entry<String, Object> entry : article.getFieldValues().entrySet()) { m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), NULL_VALUE); // Create mutations for the metadata table. String metadataKey = entry.getKey() + METADATA_EVENT_COLUMN_FAMILY + language; if (!metadataSent.contains(metadataKey)) { Mutation mm = new Mutation(entry.getKey()); mm.put(METADATA_EVENT_COLUMN_FAMILY, language, cv, article.getTimestamp(), NULL_VALUE); context.write(metadataTableName, mm); metadataSent.add(metadataKey); } } // Tokenize the content Set<String> tokens = getTokens(article); // We are going to put the fields to be indexed into a multimap. This allows us to iterate // over the entire set once. Multimap<String, String> indexFields = HashMultimap.create(); // Add the normalized field values LcNoDiacriticsNormalizer normalizer = new LcNoDiacriticsNormalizer(); for (Entry<String, String> index : article.getNormalizedFieldValues().entrySet()) { indexFields.put(index.getKey(), index.getValue()); } // Add the tokens for (String token : tokens) { indexFields.put(TOKENS_FIELD_NAME, normalizer.normalizeFieldValue("", token)); } for (Entry<String, String> index : indexFields.entries()) { // Create mutations for the in partition index // Row is partition id, colf is 'fi'\0fieldName, colq is fieldValue\0language\0article id m.put(indexPrefix + index.getKey(), index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(), NULL_VALUE); // Create mutations for the global index // Create a UID object for the Value Builder uidBuilder = Uid.List.newBuilder(); uidBuilder.setIGNORE(false); uidBuilder.setCOUNT(1); uidBuilder.addUID(Integer.toString(article.getId())); Uid.List uidList = uidBuilder.build(); Value val = new Value(uidList.toByteArray()); // Create mutations for the global index // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List // object Mutation gm = new Mutation(index.getValue()); gm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val); context.write(indexTableName, gm); // Create mutations for the global reverse index Mutation grm = new Mutation(StringUtils.reverse(index.getValue())); grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val); context.write(reverseIndexTableName, grm); // Create mutations for the metadata table. String metadataKey = index.getKey() + METADATA_INDEX_COLUMN_FAMILY + language; if (!metadataSent.contains(metadataKey)) { Mutation mm = new Mutation(index.getKey()); mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE); context.write(metadataTableName, mm); metadataSent.add(metadataKey); } } // Add the entire text to the document section of the table. // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded // GZIP'd document m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(), new Value(Base64.encodeBase64(article.getText().getBytes()))); context.write(tablename, m); } else { context.getCounter("wikipedia", "invalid articles").increment(1); } context.progress(); }
From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitioner.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Article article = extractor//from ww w. jav a 2 s. c o m .extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8)); if (article != null) { int groupId = WikipediaMapper.getPartitionId(article, numGroups); if (groupId != myGroup) return; context.write(new Text(language), article); } else { context.getCounter("wikipedia", "invalid articles").increment(1); context.progress(); } }
From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java
License:Apache License
public boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { String pEndRow = "empty"; if (parentEndRow != null) { pEndRow = parentEndRow.toString(); }//ww w . j a va 2 s .c o m log.debug("jump, current range: " + range + " parentEndRow is: " + pEndRow); } if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) { // can't go there. if (log.isDebugEnabled()) { log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); } return false; } int comp; if (!this.hasTop()) { if (log.isDebugEnabled()) { log.debug("current row: " + this.currentRow); } /* * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row * or not. Current row can be null because maybe I never found anything in this row. */ if (parentEndRow != null) { // if jumpKey row is greater than parentEndRow, stop if (jumpKey.getRow().compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("jumpKey row is greater than my parentEndRow, done"); } return false; } // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done"); } return false; } // if my current row is greater than jump row stop, a seek will be // called to get me going again. If my row is equal, but i don't // have a topkey, i'm done if (currentRow.compareTo(jumpKey.getRow()) >= 0) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done"); } return false; } } else { // we're allowed to go to the end of the tablet // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done"); } return false; } if (currentRow.compareTo(jumpKey.getRow()) >= 0) { // i'm past or equal to the jump point and have no top, // jumping's not going to help if (log.isDebugEnabled()) { log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done"); } return false; } } // ok, jumpKey is ahead of me I'll mark it and allow the normal // flow to jump there and see if I have top. if (log.isDebugEnabled()) { log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking"); } comp = -1; } else { // I have a topKey, I can do the normal comparisons if (log.isDebugEnabled()) { log.debug("have top, can do normal comparisons"); } comp = this.topKey.getRow().compareTo(jumpKey.getRow()); } // ------------------ // compare rows if (comp > 0) { // my row is ahead of jump key if (canBeInNextRow()) { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & it's ok."); log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: " + parentEndRow); } return true; } else { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & can't be here, or at end of tablet."); } topKey = null; topValue = null; return false; } } else if (comp < 0) { // a row behind jump key, need to move forward if (log.isDebugEnabled()) { String myRow = ""; if (hasTop()) { myRow = topKey.getRow().toString(); } else if (currentRow != null) { myRow = currentRow.toString(); } log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking"); } range = buildRange(jumpKey.getRow()); // this.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } if (!this.hasTop()) { log.debug("seeked with new row and had no top"); topKey = null; topValue = null; return false; } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow); } topKey = null; topValue = null; return false; } if (log.isDebugEnabled()) { log.debug("jumped, valid top: " + getTopKey()); } return true; } else { // rows are equal, check the uid! keyParser.parse(topKey); String myUid = keyParser.getUid(); keyParser.parse(jumpKey); String jumpUid = keyParser.getUid(); int ucomp = myUid.compareTo(jumpUid); if (log.isDebugEnabled()) { log.debug("topKeyUid: " + myUid + " jumpUid: " + jumpUid + " myUid.compareTo(jumpUid)->" + ucomp); } if (ucomp < 0) { // need to move up log.debug("my uid is less than jumpUid, topUid: " + myUid + " jumpUid: " + jumpUid); Text cq = jumpKey.getColumnQualifier(); int index = cq.find(NULL_BYTE); if (0 <= index) { cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1); } else { log.error("Expected a NULL separator in the column qualifier"); this.topKey = null; this.topValue = null; return false; } // note my internal range stays the same, I just need to move forward Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq)); Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE)); range = new Range(startKey, true, endKey, false); log.debug("Using range: " + range + " to seek"); // source.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } return success; } else { // else do nothing log.debug("my uid is greater than jumpUid, topKey: " + topKey + " jumpKey: " + jumpKey); log.debug("doing nothing"); } } return hasTop(); }
From source file:org.apache.accumulo.pig.AccumuloWholeRowStorage.java
License:Apache License
private Tuple columnToTuple(Text colfam, Text colqual, Text colvis, long ts, Value val) throws IOException { Tuple tuple = TupleFactory.getInstance().newTuple(5); tuple.set(0, new DataByteArray(colfam.getBytes())); tuple.set(1, new DataByteArray(colqual.getBytes())); tuple.set(2, new DataByteArray(colvis.getBytes())); tuple.set(3, new Long(ts)); tuple.set(4, new DataByteArray(val.get())); return tuple; }