Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.java

License:Open Source License

@Override
public void map(LongWritable rowKey, Text rawJsonRow, Context context)
        throws IOException, InterruptedException {
    final Logger logger = Logger.getLogger(KissmetricsJsonToEnrichedJsonMapper.class);

    String fileNameInputToMapper = "";
    String filePath = "";
    try {//from  w ww.  j  a va2s.c  o m
        fileNameInputToMapper = ((FileSplit) context.getInputSplit()).getPath().getName();
        filePath = ((FileSplit) context.getInputSplit()).getPath().toString();
    } catch (Exception e) {
        logger.info("unable to get file inputpath");
        System.out.println("unable to get file inputpath");
    }
    //TODO: String capturedDate = getCurrentDate();
    KeyRowWrapper keyRow = KissmetricsRowParser.parseJsonRowToValidJson(rawJsonRow, fileNameInputToMapper,
            filePath);

    if (keyRow != null) {
        if (keyRow.getKey() != null && keyRow.getJsonrow() != null
                && keyRow.getReturnCode() == TRACKING_COUNTER.VALID_JSON_ROW) {
            context.getCounter(TRACKING_COUNTER.VALID_JSON_ROW).increment(1);

            //TODO: Monitoring use of Octal Decoder
            if (keyRow.getMonitoringCode() == TRACKING_COUNTER.OCTAL_PARSING_NEEDED) {
                context.getCounter(TRACKING_COUNTER.OCTAL_PARSING_NEEDED).increment(1);
            }

            context.write(new Text(keyRow.getKey()), new Text(keyRow.getJsonrow()));
        } else if (keyRow.getMonitoringCode() == TRACKING_COUNTER.INVALID_DATE) {
            logger.info("Error on row parsing, skipping row");
            System.out.println("Error on row parsing");
            System.out.println("Skipped row with contents: " + rawJsonRow.toString());
            System.out.println("--------------------------------");
            context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1);
            context.getCounter(TRACKING_COUNTER.INVALID_DATE).increment(1);
        } else { // Assume it's an invalid
            logger.info("Error on row parsing, skipping row");
            System.out.println("Error on row parsing");
            System.out.println("Skipped row with contents: " + rawJsonRow.toString());
            System.out.println("--------------------------------");
            context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1);
        }
    } else {
        logger.info("Error on row parsing, skipping row");
        System.out.println("Error on row parsing");
        System.out.println("Skipped row with contents: " + rawJsonRow.toString());
        System.out.println("--------------------------------");
        context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1);
    }

}

From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String s = value.toString();
    JSONParser jsonParser = new JSONParser();
    try {//from w w  w. j a  va2  s  . co  m
        JSONObject jsonObject = (JSONObject) jsonParser.parse(s);
        Set<String> keyset = jsonObject.keySet();
        String jsonValue = "";
        for (String jsonkey : keyset) {
            jsonValue = (String) jsonObject.get(jsonkey).toString();
            if (jsonValue == null || jsonValue == "") {
                jsonValue = "";
            }
            String lenValue = String.valueOf(jsonValue.length());
            if (lenValue == null || lenValue == "") {
                lenValue = "0";
            }
            context.write(new Text(jsonkey), new Text("1\t" + lenValue));
        }

    } catch (ParseException e) {
        e.printStackTrace();
    }
}

From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.java

License:Open Source License

public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

    int total = 0;
    int maxLen = 0;
    int currentLen = 0;
    List<String> valueList = null;
    for (Text value : values) {
        valueList = Arrays.asList(value.toString().split("\t"));
        total += Integer.valueOf(valueList.get(0));
        if (valueList.size() > 1) {
            currentLen = Integer.valueOf(valueList.get(1));
            if (maxLen < currentLen) {
                maxLen = currentLen;//  w  ww  .  ja va 2s . c  om
            }
        }
    }
    context.write(key, new Text(String.valueOf(total) + "\t" + String.valueOf(maxLen)));
}

From source file:com.justgiving.raven.kissmetrics.utils.KissmetricsRowParser.java

License:Open Source License

/***
 * Used to parse, escape and enrich Kissmetircs Json records
 * /*from www  .  j  a  v  a  2 s  . c  o m*/
 * @param rawJsonRow
 * @param fileNameInputToMapper
 * @return
 */
public static KeyRowWrapper parseJsonRowToValidJson(Text rawJsonRow, String fileNameInputToMapper,
        String filePath) {

    String jsonString = "";
    boolean wasOctalParsingNeeded = false;

    try {
        System.setProperty("file.encoding", "UTF-8");
        s = rawJsonRow.toString();
        Charset charSet = Charset.forName("UTF-8");
        byte[] encoded = s.getBytes(charSet);
        decodedStrRaw = new String(encoded, charSet);

        // Test new Apache Lang3
        // decodedStr = StringEscapeUtils.unescapeJava(decodedStr);

        //Replace any remaining Octal encoded Characters
        decodedStrParsed = replaceOctalUft8Char(decodedStrRaw);
        if (decodedStrParsed.compareTo(decodedStrRaw) == 0) {
            wasOctalParsingNeeded = false;
        } else {
            wasOctalParsingNeeded = true;
        }

        if (decodedStrParsed != null && decodedStrParsed != "") {
            JSONObject jsonObject = (JSONObject) jsonParser.parse(decodedStrParsed);

            // get email and user_id
            if (jsonObject.get("_p2") != null) {
                p2 = jsonObject.get("_p2").toString().toLowerCase();
                if (p2.contains("@")) {
                    jsonObject.put("user_email", p2);
                    jsonObject.put("user_email_back", p2);
                } else if (p2 != null && p2.length() > 0) {
                    jsonObject.put("user_km_id", p2);
                }
            }
            // get email and user_id
            if (jsonObject.get("_p") != null) {
                p = jsonObject.get("_p").toString().toLowerCase();
                if (p.contains("@")) {
                    jsonObject.put("user_email", p);
                    jsonObject.put("user_email_back", p);
                } else if (p != null && p.length() > 0) {
                    jsonObject.put("user_km_id", p);
                }
            }

            // Add Event
            if (jsonObject.get("_n") != null) {
                event = jsonObject.get("_n").toString();
                if (event != null) {
                    jsonObject.put("event", event);
                }
            }

            // add unix timestamp and datetime
            long currentDateTime = System.currentTimeMillis();
            Date currentDate = new Date(currentDateTime);
            if (jsonObject.get("_t") == null) {
                return (new KeyRowWrapper(jsonString, null, TRACKING_COUNTER.INVALID_JSON_ROW,
                        TRACKING_COUNTER.INVALID_DATE));
            }
            long kmTimeDateMilliSeconds;
            long kmTimeDateMilliSecondsMobile;
            try {
                tTimestampValue = (String) jsonObject.get("_t").toString();

                //See if new record with server timestamp
                if (jsonObject.get("_server_timestamp") != null) {
                    serverTimestampValue = (String) jsonObject.get("_server_timestamp").toString();
                } else {
                    serverTimestampValue = "0";
                }

                //Deal with mobile timedate cases
                if (jsonObject.get("_c") != null) {
                    if (serverTimestampValue.equals("0")) {
                        timestampValueOutput = tTimestampValue;
                        kmTimeDateMilliSecondsMobile = 0;
                    } else {
                        timestampValueOutput = serverTimestampValue;
                        mobileTimestampValueOutput = tTimestampValue;
                        jsonObject.put("km_timestamp_mobile", mobileTimestampValueOutput);
                        kmTimeDateMilliSecondsMobile = Long.parseLong(mobileTimestampValueOutput) * 1000;
                    }
                } else {//Ignore server time
                        //TODO Need a way to resolve mobile identify events
                    serverTimestampValue = "0";
                    timestampValueOutput = tTimestampValue;
                    kmTimeDateMilliSecondsMobile = 0;
                }

                jsonObject.put("km_timestamp", timestampValueOutput);
                kmTimeDateMilliSeconds = Long.parseLong(timestampValueOutput) * 1000;
            } catch (Exception e) {
                return (new KeyRowWrapper(jsonString, timestampValueOutput, TRACKING_COUNTER.INVALID_JSON_ROW,
                        TRACKING_COUNTER.INVALID_DATE));
            }
            Calendar calendar = Calendar.getInstance();
            calendar.setTimeInMillis(kmTimeDateMilliSeconds);
            String event_timedate = dateFormatter.format(calendar.getTime());
            jsonObject.put("event_timedate", event_timedate);

            if (kmTimeDateMilliSecondsMobile > 0) {
                calendar.setTimeInMillis(kmTimeDateMilliSecondsMobile);
                String event_timedate_mobile = dateFormatter.format(calendar.getTime());
                jsonObject.put("event_timedate_mobile", event_timedate_mobile);
            }

            // add Map Reduce json_filename
            jsonObject.put("filename", fileNameInputToMapper);
            jsonString = jsonObject.toString();

            //Add bucket path
            jsonObject.put("bucket", filePath);
            jsonString = jsonObject.toString();

            // TODO add the time the record was processed by Mapper:
            //jsonObject.put("capturedDate", capturedDate);
            //jsonString = jsonObject.toString();

            return (new KeyRowWrapper(jsonString, timestampValueOutput, TRACKING_COUNTER.VALID_JSON_ROW,
                    wasOctalParsingNeeded ? null : TRACKING_COUNTER.OCTAL_PARSING_NEEDED));

        }

    } catch (Exception e) {
        // System.err.println(e.getMessage());
        // e.printStackTrace();
        StringWriter errors = new StringWriter();
        e.printStackTrace(new PrintWriter(errors));
        logger.error(errors.toString());

        logger.error("log - file " + fileNameInputToMapper);
        System.out.println("file " + fileNameInputToMapper);

        logger.error("log - row content: " + rawJsonRow.toString().replace("\t", ""));
        System.err.println("row content: " + rawJsonRow.toString().replace("\t", ""));

        System.err.println("Error skipping row");
        logger.error("Log - Error skipping row");
    }
    return null;
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    log.debug("< ({}, {})", key, value);

    k.clear();//w ww  .  j  a va 2 s  .c o m
    v.clear();

    String[] tokens = value.toString().split("\\t");
    if (tokens.length > 0) {
        if ((tokens.length == 3) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0)
                && (tokens[2].trim().length() > 0)) {
            resource(k, v, tokens[0], tokens[1], tokens[2]);
        } else if ((tokens.length == 4) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0)
                && (tokens[3].trim().length() > 0)) {
            if (tokens[2].trim().length() == 0) {
                literal(k, v, tokens[0], tokens[1], tokens[3]);
            } else {
                if (tokens[2].startsWith(LANG)) {
                    literal_lang(k, v, tokens[0], tokens[1], tokens[3],
                            tokens[2].substring(tokens[2].lastIndexOf('/') + 1));
                } else {
                    if (tokens[1].equals(OBJECT_KEY)) {
                        literal2(k, v, tokens[0], tokens[1], tokens[2], tokens[3]);
                    } else if ((tokens[1].equals(OBJECT_NAME)) && (tokens[2].startsWith(GUID))) {
                        literal2(k, v, tokens[0], tokens[1], tokens[2], tokens[3]);
                    } else {
                        log.warn("Unexpected data, ignoring: {}", value);
                    }
                }
            }
        } else {
            if (tokens.length < 3) {
                log.warn("Line with only {} tokens: {}", tokens.length, value.toString());
            } else {
                log.warn("Line with one or more empty tokens: {}", value.toString());
            }
        }
    }

    emit(context, k, v);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    for (Text value : values) {
        log.debug("< ({}, {})", key, value);
        k.clear();//from  ww  w. j a  v a 2s . com
        byte[] kb = key.getBytes();
        k.append(kb, 0, key.getLength());
        byte[] vb = value.toString().getBytes();
        k.append(vb, 0, vb.length);
        context.write(k, nullWritable);
        log.debug("> ({}, {})", k, nullWritable);
    }
}

From source file:com.kit.udf.UDFDateFormat.java

License:Apache License

public Text evaluate(Text dateText, Text patternText) {
    if (dateText == null || patternText == null) {
        return null;
    }/*from  ww  w . j av  a2  s  . co  m*/

    try {
        if (!patternText.equals(lastPatternText)) {
            formatter.applyPattern(patternText.toString());
            lastPatternText.set(patternText);
        }
    } catch (Exception e) {
        return null;
    }

    Date date;
    try {
        date = standardFormatter.parse(dateText.toString());
        result.set(formatter.format(date));
        return result;
    } catch (ParseException e) {
        return null;
    }
}

From source file:com.kit.udf.UDFSubstrForOracle.java

License:Apache License

public Text evaluate(Text t, IntWritable pos, IntWritable len) {

    if ((t == null) || (pos == null) || (len == null)) {
        return null;
    }/*from   w ww. j  av a2 s.c  o  m*/

    r.clear();
    if ((len.get() <= 0)) {
        //return r;
        return null;
    }

    String s = t.toString();
    if ((Math.abs(pos.get()) > s.length())) {
        //return r;
        return null;
    }

    int start, end;

    if (pos.get() > 0) {
        start = pos.get() - 1;
    } else if (pos.get() < 0) {
        start = s.length() + pos.get();
    } else {
        start = 0;
    }

    if ((s.length() - start) < len.get()) {
        end = s.length();
    } else {
        end = start + len.get();
    }

    r.set(s.substring(start, end));
    return r;
}

From source file:com.knewton.mrtool.io.JsonRecordReader.java

License:Apache License

/**
 * {@inheritDoc}//from   w w  w .j a  va  2  s  .  c  om
 */
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    // This is here in case nextKeyValue() gets called again after the record reader reached the
    // end of the split and doesn't have any more records to return. It avoids a null pointer
    // exception.
    if (key == null) {
        key = new LongWritable();
    }
    key.set(pos);

    Text jsonText = new Text();
    int newSize = 0;
    if (getFilePosition() <= end) {
        newSize = in.readLine(jsonText);
        if (newSize > 0 && !jsonText.toString().isEmpty()) {
            for (ObjectDecorator<String> decorator : decorators) {
                jsonText = new Text(decorator.decorateObject(jsonText.toString()));
            }
            // This helps with avoiding to supress warnings for the entire method.
            @SuppressWarnings("unchecked")
            V tempValue = (V) gson.fromJson(jsonText.toString(), getDataClass(jsonText.toString()));
            value = tempValue;
        }
        pos += newSize;
    }
    if (newSize == 0 || jsonText.toString().isEmpty()) {
        key = null;
        value = null;
        return false;
    } else {
        return true;
    }
}

From source file:com.knewton.mrtool.io.JsonRecordReaderTest.java

License:Apache License

/**
 * Tests the line reader in the record reader to see if records can be read correctly from the
 * beginning of an input stream.// w  w  w  . jav  a 2  s  .c  om
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void testJsonRecordReader() throws IOException, InterruptedException {
    JsonRecordReader<Text> rr = new JsonRecordReader<Text>() {
        @Override
        protected Class<?> getDataClass(String jsonStr) {
            return Text.class;
        }
    };

    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    FileSplit fileSplit = new FileSplit(new Path("recs.2013-03-20_02_52.log"), 0, recommendationBytes.length,
            new String[0]);

    new MockUp<FileSystem>() {
        @Mock
        public FSDataInputStream open(Path f) throws IOException {
            return new FSDataInputStream(new SeekableByteArrayInputStream(recommendationBytes));
        }
    };
    // Initialize it to get the compression codecs
    rr.initialize(fileSplit, context);
    // close the line reader and reopen it.
    rr.close();
    LineReader lineReader = rr.initLineReader(fileSplit, conf);
    Text line = new Text();
    lineReader.readLine(line);
    assertEquals(DummyJsonRecommendations.jsonRecommendations[0], line.toString());

    line = new Text();
    lineReader.readLine(line);
    assertEquals(DummyJsonRecommendations.jsonRecommendations[1], line.toString());
    lineReader.close();
}