Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.avira.couchdoop.demo.ExportMapper.java

License:Apache License

@Override
/**//w w  w  .ja v a  2s  . c  o  m
 * Recommended articles are in the format: session_id   article_name1;score1    article_name2;score2    ....
 */
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String[] tokens = value.toString().split(DELIMITER);

    String documentKey = tokens[0];

    Recommendation rec = new Recommendation();
    for (int i = 1; i < tokens.length; i++) {
        String[] recData = tokens[i].split(SECONDARY_DELIMITER);
        rec.addArticle(new RecommendedItem(recData[0], Float.parseFloat(recData[1])));
    }

    CouchbaseAction action = new CouchbaseAction(CouchbaseOperation.SET, JACKSON.writeValueAsString(rec));

    context.write(KEY_PREFIX + documentKey, action);
}

From source file:com.avira.couchdoop.demo.ImportMapper.java

License:Apache License

@Override
protected void map(Text key, ViewRow value, Context context) throws IOException, InterruptedException {
    // Extract user ID from the Couchbase key (doc ID).
    String[] tokens = key.toString().split("::");
    if (tokens.length != 4) {
        context.getCounter(Counters.INVALID_COUCHBASE_KEYS).increment(1);
        return;//from  w  ww  . j  a  v a  2 s. c  om
    }
    String userId = tokens[3];

    // Parse Couchbase JSON document.
    String jsonString = value.getDocument().toString();
    Session session;
    try {
        session = JACKSON.readValue(jsonString, Session.class);
    } catch (IOException e) {
        context.getCounter(Counters.JSON_PARSE_ERRORS).increment(1);
        return;
    }

    // Extract article names from the JSON document.
    STRING_BUILDER.setLength(0);
    for (Session.Article article : session.getArticles()) {
        STRING_BUILDER.append(article.getName());
        STRING_BUILDER.append(DELIMITER);
    }
    STRING_BUILDER.setLength(STRING_BUILDER.length() - 1);
    String articleNames = STRING_BUILDER.toString();

    // Write output.
    OUTPUT_KEY.set(userId);
    OUTPUT_VALUE.set(articleNames);
    context.write(OUTPUT_KEY, OUTPUT_VALUE);
}

From source file:com.avira.couchdoop.exp.CsvToCouchbaseMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    Object doc;//from   w  w w .  j a  v  a  2s .com
    String docId;
    CouchbaseAction action;
    String[] pair = value.toString().split(fieldsDelimiter);

    // Validate number of columns against operation.
    if (UNARY_OPERATIONS.contains(operation) && pair.length < 1
            || !UNARY_OPERATIONS.contains(operation) && pair.length < 2) {
        context.getCounter(Error.LINES_WITH_WRONG_COLUMNS_COUNT).increment(1);
        return;
    }

    docId = pair[0];
    doc = (pair.length >= 2 ? pair[1] : null);
    action = new CouchbaseAction(operation, doc, expiry);

    context.write(docId, action);
}

From source file:com.avira.couchdoop.imp.CouchbaseViewToHBaseMapper.java

License:Apache License

@Override
protected void map(Text cbKey, ViewRow cbViewRow, Context context) throws IOException, InterruptedException {
    if (cbKey != null && cbViewRow != null && cbViewRow.getDocument() != null) {
        byte[] hRowKey = Bytes.toBytes(cbKey.toString());

        Put put = new Put(hRowKey);
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier),
                Bytes.toBytes(cbViewRow.getDocument().toString()));

        context.write(new ImmutableBytesWritable(hRowKey), put);
    }/*from  ww  w. ja v  a 2s.c  om*/
}

From source file:com.avira.couchdoop.update.BenchmarkUpdateMapper.java

License:Apache License

@Override
protected HadoopInput<Object> transform(LongWritable hKey, Text hValue, Context context) {
    String[] splits = hValue.toString().split("\t");

    return new HadoopInput<Object>(splits[0], null);
}

From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzerMapper.java

License:Open Source License

@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    String logLine = value.toString();
    Pattern p = Pattern.compile(regex);
    Matcher matcher = p.matcher(logLine);
    if (matcher.matches()) {
        dateTime.set(matcher.group(2).split(":")[0]);
        context.write(dateTime, one);/*  ww w  .j  a  v  a  2 s.c o  m*/
    }
}

From source file:com.bark.hadoop.lab3.AdjMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    /**/*  w  w  w. j av  a 2  s.c o  m*/
     * Split up the line into (key,value) pair - separated by tab
     */
    String[] document = value.toString().split("\t");
    /**
     * Nice names for better readability
     */
    String myKey = document[0];
    String myValue = document[1];
    /**
     * If (A,!), write it, else it's (B,A)-inGraph, write (A,B)-outGraph
     */
    if (myValue.equals("!")) {
        context.write(new Text(myKey), new Text(myValue));
    } else {
        context.write(new Text(myValue), new Text(myKey));
    }
}

From source file:com.bark.hadoop.lab3.AdjReducer.java

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    //aggregate all the pages that current page (key) links to in one tab seperated string.
    String line = "";
    for (Text t : values) {
        String tString = t.toString();
        if (!tString.equals("!")) {
            line += "\t" + tString;
        }/*from  w w  w. j  a  v  a  2 s.c  o  m*/
    }
    context.write(key, new Text(line.trim()));
}

From source file:com.bark.hadoop.lab3.PageRankMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    /**//  w w  w  .ja  v a2  s. co m
     * We have N, we don't need to read two files For first iteration: Read
     * adjacency graph, if no number found which is true for first
     * iteration, augment each line with 1/N, do stuff \n --- Reducer should
     * output e.g. A 0.25 B C (src) (PageRank) (outlink)... \n For next
     * iterations: Read adjacency graph, number will be found (due to output
     * format of reducer of last iteration), do stuff... \n
     */
    String test = value.toString();
    test = test.replaceAll("\t", " ").replaceFirst(" ", "\t");

    double basePageRank = 0;
    boolean hasPageRank = false;
    double pageRank = 0;
    /**
     * Pattern to distinguish our inserted numbers from numbers in titles
     * is: _!(numbers.numbers)
     */
    Pattern pt = Pattern.compile("(_!\\d+.\\S+)");
    Matcher mt = pt.matcher(test);
    if (mt.find()) {
        pageRank = Double.parseDouble(mt.group(1).substring(2));
        hasPageRank = true;
    }
    /**
     * If it's the first iteration, distribute 1/N among outLinks
     */
    if (!hasPageRank) {
        try {
            pageRank = 1d / (context.getConfiguration().getInt("N", 0));
        } catch (ArithmeticException ae) {
            /**
             * Catch division by zero (if 'N' was not set)
             */
            Logger.getLogger(PageRankMapper.class.getName()).log(Level.SEVERE, ae.getMessage(), ae);
        }
    }
    /**
     * Split input line into key,value
     */
    String[] split = test.split("\t");
    /**
     * Emit this node's (1-d)/N and it's adjacency outGraph if not empty
     */
    // d = 0.85
    basePageRank = (1 - 0.85) / (context.getConfiguration().getInt("N", 0));
    String output = "";
    output += "_!" + basePageRank;
    if (split.length > 1) {
        //split[1] => outlinks string 
        String[] outlinks = split[1].split(" ");
        for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) {
            output += " " + outlinks[i];
        }
    }
    context.write(new Text(split[0]), new Text(output.trim()));
    /**
     * Emit pageRank/|outLinks| to all outLinks if not empty: Split on \t to
     * get separate key(index 0) from values(index 1), Split values on space
     * to separate out links(ignore the first(index 0),the pageRank, unless
     * hasPageRank=false)
     */
    if (split.length > 1) {
        String[] outlinks = split[1].split(" ");
        /**
         * Input has no outLinks, only has basePageRank, already taken care
         * of in previous emit, return
         */
        if (hasPageRank && outlinks.length == 1) {
            return;
        }
        /**
         * d = 0.85
         */
        pageRank *= 0.85;
        /**
         * Divide pageRank over number of outLinks
         */
        pageRank /= hasPageRank ? (outlinks.length - 1) : outlinks.length;
        for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) {
            context.write(new Text(outlinks[i]), new Text("_!" + pageRank));
        }
    }
}

From source file:com.bark.hadoop.lab3.PageRankReducer.java

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    double newPageRank = 0;
    /**//  w w  w  .jav a 2s.com
     * Pattern to distinguish our inserted numbers from numbers in titles
     * is: _!(numbers.numbers)
     */
    Pattern pt = Pattern.compile("(_!\\d+.\\S+)");
    String outGraph = "";
    for (Text t : values) {
        String s = t.toString();
        String[] ss = s.split(" ");
        /**
         * If value has more than 1 element, those are outLinks, keep them
         */
        if (ss.length > 1) {
            for (int i = 1; i < ss.length; i++) {
                outGraph += " " + ss[i];
            }
        }
        /**
         * Read pageRanks and sum them up
         */
        Matcher mt = pt.matcher(s);
        if (mt.find()) {
            newPageRank += Double.parseDouble(mt.group(1).substring(2));
        }
    }
    //make sure not to include extra spaces (specially if outgraph is empty).
    //_!0.0000-space-A-space-B-space-C
    //_!0.0000-no space-
    context.write(key, new Text(("_!" + newPageRank + " " + outGraph.trim()).trim()));
}