List of usage examples for org.apache.hadoop.io Text find
public int find(String what)
From source file:com.lovelysystems.hive.udf.UnescapeXMLUDF.java
License:Apache License
public Text evaluate(final Text s) { if (s == null) { return null; } else if (s.find("&") == -1) { res.set(s);//from www . ja v a 2s .c o m } else { res.set(s.toString()); } return res; }
From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String userID = Text.decode(value.getBytes(), 0, tabIdx1); String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)); String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1)); // System.out.print(userID); // System.out.print("\t"); // System.out.print(pageRank); // System.out.print("\t"); // System.out.println(CSVFollowingIDs); String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER); Integer totalFollowingIDs = followingIDs.length; for (String followingID : followingIDs) { String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString(); context.write(new Text(followingID), new Text(pageRankWithTotalFollowing)); }/*from www.j a v a 2s. c o m*/ context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs)); }
From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIndex = value.find("\t"); userID = Text.decode(value.getBytes(), 0, tabIndex); followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(followerID), new Text(userID)); }
From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String username = Text.decode(value.getBytes(), 0, tabIdx1); Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1))); context.write(new DoubleWritable(pageRank), new Text(username)); }
From source file:cosmos.impl.IndexToMultimapRecord.java
License:Apache License
@Override public MultimapRecord apply(Entry<Key, Value> input) { Key k = input.getKey();/*from w w w. ja v a 2 s. c om*/ Text colqual = k.getColumnQualifier(); int index = colqual.find(Defaults.NULL_BYTE_STR); if (-1 == index) { throw new RuntimeException("Was provided unexpected Key: " + k); } int start = index + 1; try { String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start); return sorts.contents(id, docId); } catch (TableNotFoundException e) { throw new RuntimeException(e); } catch (UnexpectedStateException e) { throw new RuntimeException(e); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void text() { Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); assertThat(t.getLength(), is(10)); assertThat(t.find("\u0041"), is(0)); assertThat(t.find("\u00DF"), is(1)); assertThat(t.find("\u6771"), is(3)); assertThat(t.find("\uD801\uDC00"), is(6)); assertThat(t.charAt(0), is(0x0041)); assertThat(t.charAt(1), is(0x00DF)); assertThat(t.charAt(3), is(0x6771)); assertThat(t.charAt(6), is(0x10400)); }/*w w w. jav a 2 s . c o m*/
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void find() throws IOException { // vv TextTest-Find Text t = new Text("hadoop"); assertThat("Find a substring", t.find("do"), is(2)); assertThat("Finds first 'o'", t.find("o"), is(3)); assertThat("Finds 'o' from position 4 or later", t.find("o", 4), is(4)); assertThat("No match", t.find("pig"), is(-1)); // ^^ TextTest-Find }//from w ww .j a v a 2s .c o m
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void withSupplementaryCharacters() throws IOException { String s = "\u0041\u00DF\u6771\uD801\uDC00"; assertThat(s.length(), is(5));/*from w ww . ja va2s . c o m*/ assertThat(s.getBytes("UTF-8").length, is(10)); assertThat(s.indexOf('\u0041'), is(0)); assertThat(s.indexOf('\u00DF'), is(1)); assertThat(s.indexOf('\u6771'), is(2)); assertThat(s.indexOf('\uD801'), is(3)); assertThat(s.indexOf('\uDC00'), is(4)); assertThat(s.charAt(0), is('\u0041')); assertThat(s.charAt(1), is('\u00DF')); assertThat(s.charAt(2), is('\u6771')); assertThat(s.charAt(3), is('\uD801')); assertThat(s.charAt(4), is('\uDC00')); Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); assertThat(serializeToString(t), is("0a41c39fe69db1f0909080")); assertThat(t.charAt(t.find("\u0041")), is(0x0041)); assertThat(t.charAt(t.find("\u00DF")), is(0x00DF)); assertThat(t.charAt(t.find("\u6771")), is(0x6771)); assertThat(t.charAt(t.find("\uD801\uDC00")), is(0x10400)); }
From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBasicJoin.java
License:Apache License
public void map(Object unused, Text record, OutputCollector<IntTripleWritable, Text> output, Reporter reporter) throws IOException { String recordString = record.toString(); if (record.find("" + FuzzyJoinConfig.RECORD_SEPARATOR) >= 0) { /*/*from w ww . j a va2s . c o m*/ * VALUE1: RID:Record * * KEY2: 0/1 (0: Relation R, 1: Relation S), RID, 0 * * VALUE2: Record */ String valueSplit[] = recordString.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX); int relation = 0; if (reporter.getInputSplit().toString().contains(suffixSecond)) { relation = 1; } outputKey.set(relation, Integer.valueOf(valueSplit[FuzzyJoinConfig.RECORD_KEY]), 0); outputValue.set(record); output.collect(outputKey, outputValue); } else { /* * VALUE1: "RID-R RID-S Similarity" * * KEY2: 0/1 (0: Relation R, 1: Relation S), RID, 1 * * VALUE2: "RIDOther Similarity" */ String valueSplit[] = recordString.split(FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX); outputKey.set(0, Integer.parseInt(valueSplit[0]), 1); outputValue.set(valueSplit[1] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]); output.collect(outputKey, outputValue); outputKey.set(1, Integer.parseInt(valueSplit[1]), 1); outputValue.set(valueSplit[0] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]); output.collect(outputKey, outputValue); } }
From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBasicSelfJoin.java
License:Apache License
public void map(Object unused, Text inputValue, OutputCollector<IntPairWritable, Text> output, Reporter reporter) throws IOException { String recordString = inputValue.toString(); if (inputValue.find("" + FuzzyJoinConfig.RECORD_SEPARATOR) >= 0) { /*/*from w ww. j a va 2 s .c om*/ * VALUE1: RID:Record * * KEY2: RID, 0 * * VALUE2: Record */ String valueSplit[] = recordString.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX); outputKey.set(Integer.valueOf(valueSplit[FuzzyJoinConfig.RECORD_KEY]), 0); outputValue.set(inputValue); output.collect(outputKey, outputValue); } else { /* * VALUE1: "RID1 RID2 Similarity" * * KEY2: RID1, 1 and RID2, 1 * * VALUE2: "RID2 Similarity" and "RID1 Similarity" */ String valueSplit[] = recordString.split(FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX); outputKey.set(Integer.parseInt(valueSplit[0]), 1); outputValue.set(valueSplit[1] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]); output.collect(outputKey, outputValue); outputKey.set(Integer.parseInt(valueSplit[1]), 1); outputValue.set(valueSplit[0] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]); output.collect(outputKey, outputValue); } }