List of usage examples for org.apache.hadoop.io Text find
public int find(String what)
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
@Override public boolean nextKeyValue() throws IOException, CharacterCodingException { if (!lineRR.nextKeyValue()) return false; Text line = getCurrentValue(); int tabOne = line.find("\t"); int rid = Integer.parseInt(Text.decode(line.getBytes(), 0, tabOne)); int tabTwo = line.find("\t", tabOne + 1); int posBeg = tabOne + 1; int posEnd = tabTwo - 1; int pos = Integer.parseInt(Text.decode(line.getBytes(), posBeg, posEnd - posBeg + 1)); key.set(BAMRecordReader.getKey0(rid, pos)); return true;//from w ww.j a va2 s . c o m }
From source file:io.aos.hdfs.StringTextComparisonTest.java
License:Apache License
@Test public void text() { Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); assertThat(t.getLength(), is(10));/*w w w . jav a 2 s .com*/ assertThat(t.find("\u0041"), is(0)); assertThat(t.find("\u00DF"), is(1)); assertThat(t.find("\u6771"), is(3)); assertThat(t.find("\uD801\uDC00"), is(6)); assertThat(t.charAt(0), is(0x0041)); assertThat(t.charAt(1), is(0x00DF)); assertThat(t.charAt(3), is(0x6771)); assertThat(t.charAt(6), is(0x10400)); }
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void find() throws IOException { // vv TextTest-Find Text t = new Text("hadoop"); assertThat("Find a substring", t.find("do"), is(2)); assertThat("Finds first 'o'", t.find("o"), is(3)); assertThat("Finds 'o' from position 4 or later", t.find("o", 4), is(4)); assertThat("No match", t.find("pig"), is(-1)); // ^^ TextTest-Find }
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void withSupplementaryCharacters() throws IOException { String s = "\u0041\u00DF\u6771\uD801\uDC00"; assertThat(s.length(), is(5));/* w w w.ja v a 2 s .c om*/ assertThat(s.getBytes("UTF-8").length, is(10)); assertThat(s.indexOf('\u0041'), is(0)); assertThat(s.indexOf('\u00DF'), is(1)); assertThat(s.indexOf('\u6771'), is(2)); assertThat(s.indexOf('\uD801'), is(3)); assertThat(s.indexOf('\uDC00'), is(4)); assertThat(s.charAt(0), is('\u0041')); assertThat(s.charAt(1), is('\u00DF')); assertThat(s.charAt(2), is('\u6771')); assertThat(s.charAt(3), is('\uD801')); assertThat(s.charAt(4), is('\uDC00')); Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); assertThat(serializeToString(t), is("0a41c39fe69db1f0909080")); assertThat(t.charAt(t.find("\u0041")), is(0x0041)); assertThat(t.charAt(t.find("\u00DF")), is(0x00DF)); assertThat(t.charAt(t.find("\u6771")), is(0x6771)); assertThat(t.charAt(t.find("\uD801\uDC00")), is(0x10400)); }
From source file:it.uniroma1.hadoop.pagerank.job1.PageRankJob1Mapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /* Job#1 mapper will simply parse a line of the input graph creating a map with key-value(s) pairs. * Input format is the following (separator is TAB): * /*from w ww . j a v a 2 s. c om*/ * <nodeA> <nodeB> * * which denotes an edge going from <nodeA> to <nodeB>. * We would need to skip comment lines (denoted by the # characters at the beginning of the line). * We will also collect all the distinct nodes in our graph: this is needed to compute the initial * pagerank value in Job #1 reducer and also in later jobs. */ if (value.charAt(0) != '#') { int tabIndex = value.find("\t"); String nodeA = Text.decode(value.getBytes(), 0, tabIndex); String nodeB = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(nodeA), new Text(nodeB)); // add the current source node to the node list so we can // compute the total amount of nodes of our graph in Job#2 PageRank.NODES.add(nodeA); // also add the target node to the same list: we may have a target node // with no outlinks (so it will never be parsed as source) PageRank.NODES.add(nodeB); } }
From source file:it.uniroma1.hadoop.pagerank.job2.PageRankJob2Mapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /* PageRank calculation algorithm (mapper) * Input file format (separator is TAB): * /*from ww w. j a va 2 s .c o m*/ * <title> <page-rank> <link1>,<link2>,<link3>,<link4>,... ,<linkN> * * Output has 2 kind of records: * One record composed by the collection of links of each page: * * <title> |<link1>,<link2>,<link3>,<link4>, ... , <linkN> * * Another record composed by the linked page, the page rank of the source page * and the total amount of out links of the source page: * * <link> <page-rank> <total-links> */ int tIdx1 = value.find("\t"); int tIdx2 = value.find("\t", tIdx1 + 1); // extract tokens from the current line String page = Text.decode(value.getBytes(), 0, tIdx1); String pageRank = Text.decode(value.getBytes(), tIdx1 + 1, tIdx2 - (tIdx1 + 1)); String links = Text.decode(value.getBytes(), tIdx2 + 1, value.getLength() - (tIdx2 + 1)); String[] allOtherPages = links.split(","); for (String otherPage : allOtherPages) { Text pageRankWithTotalLinks = new Text(pageRank + "\t" + allOtherPages.length); context.write(new Text(otherPage), pageRankWithTotalLinks); } // put the original links so the reducer is able to produce the correct output context.write(new Text(page), new Text(PageRank.LINKS_SEPARATOR + links)); }
From source file:it.uniroma1.hadoop.pagerank.job3.PageRankJob3Mapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /* Rank Ordering (mapper only) * Input file format (separator is TAB): * /*from w w w .j a v a2 s . c o m*/ * <title> <page-rank> <link1>,<link2>,<link3>,<link4>,... ,<linkN> * * This is a simple job which does the ordering of our documents according to the computed pagerank. * We will map the pagerank (key) to its value (page) and Hadoop will do the sorting on keys for us. * There is no need to implement a reducer: the mapping and sorting is enough for our purpose. */ int tIdx1 = value.find("\t"); int tIdx2 = value.find("\t", tIdx1 + 1); // extract tokens from the current line String page = Text.decode(value.getBytes(), 0, tIdx1); float pageRank = Float.parseFloat(Text.decode(value.getBytes(), tIdx1 + 1, tIdx2 - (tIdx1 + 1))); context.write(new DoubleWritable(pageRank), new Text(page)); }
From source file:org.apache.accumulo.core.iterators.user.IndexedDocIterator.java
License:Apache License
public static Text parseDocID(Key key) { Text colq = key.getColumnQualifier(); int firstZeroIndex = colq.find("\0"); if (firstZeroIndex < 0) { throw new IllegalArgumentException("bad docid: " + key.toString()); }/*from www . ja v a2 s . co m*/ int secondZeroIndex = colq.find("\0", firstZeroIndex + 1); if (secondZeroIndex < 0) { throw new IllegalArgumentException("bad docid: " + key.toString()); } int thirdZeroIndex = colq.find("\0", secondZeroIndex + 1); if (thirdZeroIndex < 0) { throw new IllegalArgumentException("bad docid: " + key.toString()); } Text docID = new Text(); try { docID.set(colq.getBytes(), firstZeroIndex + 1, thirdZeroIndex - 1 - firstZeroIndex); } catch (ArrayIndexOutOfBoundsException e) { throw new IllegalArgumentException("bad indices for docid: " + key.toString() + " " + firstZeroIndex + " " + secondZeroIndex + " " + thirdZeroIndex); } return docID; }
From source file:org.apache.accumulo.core.iterators.user.IndexedDocIterator.java
License:Apache License
@Override protected Text getTerm(Key key) { if (indexColf.compareTo(key.getColumnFamily().getBytes(), 0, indexColf.getLength()) < 0) { // We're past the index column family, so return a term that will sort lexicographically last. // The last unicode character should suffice return new Text("\uFFFD"); }/*from ww w .j ava 2s.co m*/ Text colq = key.getColumnQualifier(); int zeroIndex = colq.find("\0"); Text term = new Text(); term.set(colq.getBytes(), 0, zeroIndex); return term; }
From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java
License:Apache License
public boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { String pEndRow = "empty"; if (parentEndRow != null) { pEndRow = parentEndRow.toString(); }//from w ww . ja v a 2 s . c o m log.debug("jump, current range: " + range + " parentEndRow is: " + pEndRow); } if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) { // can't go there. if (log.isDebugEnabled()) { log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); } return false; } int comp; if (!this.hasTop()) { if (log.isDebugEnabled()) { log.debug("current row: " + this.currentRow); } /* * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row * or not. Current row can be null because maybe I never found anything in this row. */ if (parentEndRow != null) { // if jumpKey row is greater than parentEndRow, stop if (jumpKey.getRow().compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("jumpKey row is greater than my parentEndRow, done"); } return false; } // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done"); } return false; } // if my current row is greater than jump row stop, a seek will be // called to get me going again. If my row is equal, but i don't // have a topkey, i'm done if (currentRow.compareTo(jumpKey.getRow()) >= 0) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done"); } return false; } } else { // we're allowed to go to the end of the tablet // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done"); } return false; } if (currentRow.compareTo(jumpKey.getRow()) >= 0) { // i'm past or equal to the jump point and have no top, // jumping's not going to help if (log.isDebugEnabled()) { log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done"); } return false; } } // ok, jumpKey is ahead of me I'll mark it and allow the normal // flow to jump there and see if I have top. if (log.isDebugEnabled()) { log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking"); } comp = -1; } else { // I have a topKey, I can do the normal comparisons if (log.isDebugEnabled()) { log.debug("have top, can do normal comparisons"); } comp = this.topKey.getRow().compareTo(jumpKey.getRow()); } // ------------------ // compare rows if (comp > 0) { // my row is ahead of jump key if (canBeInNextRow()) { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & it's ok."); log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: " + parentEndRow); } return true; } else { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & can't be here, or at end of tablet."); } topKey = null; topValue = null; return false; } } else if (comp < 0) { // a row behind jump key, need to move forward if (log.isDebugEnabled()) { String myRow = ""; if (hasTop()) { myRow = topKey.getRow().toString(); } else if (currentRow != null) { myRow = currentRow.toString(); } log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking"); } range = buildRange(jumpKey.getRow()); // this.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } if (!this.hasTop()) { log.debug("seeked with new row and had no top"); topKey = null; topValue = null; return false; } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow); } topKey = null; topValue = null; return false; } if (log.isDebugEnabled()) { log.debug("jumped, valid top: " + getTopKey()); } return true; } else { // rows are equal, check the uid! keyParser.parse(topKey); String myUid = keyParser.getUid(); keyParser.parse(jumpKey); String jumpUid = keyParser.getUid(); int ucomp = myUid.compareTo(jumpUid); if (log.isDebugEnabled()) { log.debug("topKeyUid: " + myUid + " jumpUid: " + jumpUid + " myUid.compareTo(jumpUid)->" + ucomp); } if (ucomp < 0) { // need to move up log.debug("my uid is less than jumpUid, topUid: " + myUid + " jumpUid: " + jumpUid); Text cq = jumpKey.getColumnQualifier(); int index = cq.find(NULL_BYTE); if (0 <= index) { cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1); } else { log.error("Expected a NULL separator in the column qualifier"); this.topKey = null; this.topValue = null; return false; } // note my internal range stays the same, I just need to move forward Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq)); Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE)); range = new Range(startKey, true, endKey, false); log.debug("Using range: " + range + " to seek"); // source.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } return success; } else { // else do nothing log.debug("my uid is greater than jumpUid, topKey: " + topKey + " jumpKey: " + jumpKey); log.debug("doing nothing"); } } return hasTop(); }