List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.avira.couchdoop.demo.ExportMapper.java
License:Apache License
@Override /**//w w w .ja v a 2s . c o m * Recommended articles are in the format: session_id article_name1;score1 article_name2;score2 .... */ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] tokens = value.toString().split(DELIMITER); String documentKey = tokens[0]; Recommendation rec = new Recommendation(); for (int i = 1; i < tokens.length; i++) { String[] recData = tokens[i].split(SECONDARY_DELIMITER); rec.addArticle(new RecommendedItem(recData[0], Float.parseFloat(recData[1]))); } CouchbaseAction action = new CouchbaseAction(CouchbaseOperation.SET, JACKSON.writeValueAsString(rec)); context.write(KEY_PREFIX + documentKey, action); }
From source file:com.avira.couchdoop.demo.ImportMapper.java
License:Apache License
@Override protected void map(Text key, ViewRow value, Context context) throws IOException, InterruptedException { // Extract user ID from the Couchbase key (doc ID). String[] tokens = key.toString().split("::"); if (tokens.length != 4) { context.getCounter(Counters.INVALID_COUCHBASE_KEYS).increment(1); return;//from w ww . j a v a 2 s. c om } String userId = tokens[3]; // Parse Couchbase JSON document. String jsonString = value.getDocument().toString(); Session session; try { session = JACKSON.readValue(jsonString, Session.class); } catch (IOException e) { context.getCounter(Counters.JSON_PARSE_ERRORS).increment(1); return; } // Extract article names from the JSON document. STRING_BUILDER.setLength(0); for (Session.Article article : session.getArticles()) { STRING_BUILDER.append(article.getName()); STRING_BUILDER.append(DELIMITER); } STRING_BUILDER.setLength(STRING_BUILDER.length() - 1); String articleNames = STRING_BUILDER.toString(); // Write output. OUTPUT_KEY.set(userId); OUTPUT_VALUE.set(articleNames); context.write(OUTPUT_KEY, OUTPUT_VALUE); }
From source file:com.avira.couchdoop.exp.CsvToCouchbaseMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Object doc;//from w w w . j a v a 2s .com String docId; CouchbaseAction action; String[] pair = value.toString().split(fieldsDelimiter); // Validate number of columns against operation. if (UNARY_OPERATIONS.contains(operation) && pair.length < 1 || !UNARY_OPERATIONS.contains(operation) && pair.length < 2) { context.getCounter(Error.LINES_WITH_WRONG_COLUMNS_COUNT).increment(1); return; } docId = pair[0]; doc = (pair.length >= 2 ? pair[1] : null); action = new CouchbaseAction(operation, doc, expiry); context.write(docId, action); }
From source file:com.avira.couchdoop.imp.CouchbaseViewToHBaseMapper.java
License:Apache License
@Override protected void map(Text cbKey, ViewRow cbViewRow, Context context) throws IOException, InterruptedException { if (cbKey != null && cbViewRow != null && cbViewRow.getDocument() != null) { byte[] hRowKey = Bytes.toBytes(cbKey.toString()); Put put = new Put(hRowKey); put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier), Bytes.toBytes(cbViewRow.getDocument().toString())); context.write(new ImmutableBytesWritable(hRowKey), put); }/*from ww w. ja v a 2s.c om*/ }
From source file:com.avira.couchdoop.update.BenchmarkUpdateMapper.java
License:Apache License
@Override protected HadoopInput<Object> transform(LongWritable hKey, Text hValue, Context context) { String[] splits = hValue.toString().split("\t"); return new HadoopInput<Object>(splits[0], null); }
From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzerMapper.java
License:Open Source License
@Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String logLine = value.toString(); Pattern p = Pattern.compile(regex); Matcher matcher = p.matcher(logLine); if (matcher.matches()) { dateTime.set(matcher.group(2).split(":")[0]); context.write(dateTime, one);/* ww w .j a v a 2 s.c o m*/ } }
From source file:com.bark.hadoop.lab3.AdjMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /**/* w w w. j av a 2 s.c o m*/ * Split up the line into (key,value) pair - separated by tab */ String[] document = value.toString().split("\t"); /** * Nice names for better readability */ String myKey = document[0]; String myValue = document[1]; /** * If (A,!), write it, else it's (B,A)-inGraph, write (A,B)-outGraph */ if (myValue.equals("!")) { context.write(new Text(myKey), new Text(myValue)); } else { context.write(new Text(myValue), new Text(myKey)); } }
From source file:com.bark.hadoop.lab3.AdjReducer.java
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //aggregate all the pages that current page (key) links to in one tab seperated string. String line = ""; for (Text t : values) { String tString = t.toString(); if (!tString.equals("!")) { line += "\t" + tString; }/*from w w w. j a v a 2 s.c o m*/ } context.write(key, new Text(line.trim())); }
From source file:com.bark.hadoop.lab3.PageRankMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /**// w w w .ja v a2 s. co m * We have N, we don't need to read two files For first iteration: Read * adjacency graph, if no number found which is true for first * iteration, augment each line with 1/N, do stuff \n --- Reducer should * output e.g. A 0.25 B C (src) (PageRank) (outlink)... \n For next * iterations: Read adjacency graph, number will be found (due to output * format of reducer of last iteration), do stuff... \n */ String test = value.toString(); test = test.replaceAll("\t", " ").replaceFirst(" ", "\t"); double basePageRank = 0; boolean hasPageRank = false; double pageRank = 0; /** * Pattern to distinguish our inserted numbers from numbers in titles * is: _!(numbers.numbers) */ Pattern pt = Pattern.compile("(_!\\d+.\\S+)"); Matcher mt = pt.matcher(test); if (mt.find()) { pageRank = Double.parseDouble(mt.group(1).substring(2)); hasPageRank = true; } /** * If it's the first iteration, distribute 1/N among outLinks */ if (!hasPageRank) { try { pageRank = 1d / (context.getConfiguration().getInt("N", 0)); } catch (ArithmeticException ae) { /** * Catch division by zero (if 'N' was not set) */ Logger.getLogger(PageRankMapper.class.getName()).log(Level.SEVERE, ae.getMessage(), ae); } } /** * Split input line into key,value */ String[] split = test.split("\t"); /** * Emit this node's (1-d)/N and it's adjacency outGraph if not empty */ // d = 0.85 basePageRank = (1 - 0.85) / (context.getConfiguration().getInt("N", 0)); String output = ""; output += "_!" + basePageRank; if (split.length > 1) { //split[1] => outlinks string String[] outlinks = split[1].split(" "); for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) { output += " " + outlinks[i]; } } context.write(new Text(split[0]), new Text(output.trim())); /** * Emit pageRank/|outLinks| to all outLinks if not empty: Split on \t to * get separate key(index 0) from values(index 1), Split values on space * to separate out links(ignore the first(index 0),the pageRank, unless * hasPageRank=false) */ if (split.length > 1) { String[] outlinks = split[1].split(" "); /** * Input has no outLinks, only has basePageRank, already taken care * of in previous emit, return */ if (hasPageRank && outlinks.length == 1) { return; } /** * d = 0.85 */ pageRank *= 0.85; /** * Divide pageRank over number of outLinks */ pageRank /= hasPageRank ? (outlinks.length - 1) : outlinks.length; for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) { context.write(new Text(outlinks[i]), new Text("_!" + pageRank)); } } }
From source file:com.bark.hadoop.lab3.PageRankReducer.java
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { double newPageRank = 0; /**// w w w .jav a 2s.com * Pattern to distinguish our inserted numbers from numbers in titles * is: _!(numbers.numbers) */ Pattern pt = Pattern.compile("(_!\\d+.\\S+)"); String outGraph = ""; for (Text t : values) { String s = t.toString(); String[] ss = s.split(" "); /** * If value has more than 1 element, those are outLinks, keep them */ if (ss.length > 1) { for (int i = 1; i < ss.length; i++) { outGraph += " " + ss[i]; } } /** * Read pageRanks and sum them up */ Matcher mt = pt.matcher(s); if (mt.find()) { newPageRank += Double.parseDouble(mt.group(1).substring(2)); } } //make sure not to include extra spaces (specially if outgraph is empty). //_!0.0000-space-A-space-B-space-C //_!0.0000-no space- context.write(key, new Text(("_!" + newPageRank + " " + outGraph.trim()).trim())); }