List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.java
License:Open Source License
@Override public void map(LongWritable rowKey, Text rawJsonRow, Context context) throws IOException, InterruptedException { final Logger logger = Logger.getLogger(KissmetricsJsonToEnrichedJsonMapper.class); String fileNameInputToMapper = ""; String filePath = ""; try {//from w ww. j a va2s.c o m fileNameInputToMapper = ((FileSplit) context.getInputSplit()).getPath().getName(); filePath = ((FileSplit) context.getInputSplit()).getPath().toString(); } catch (Exception e) { logger.info("unable to get file inputpath"); System.out.println("unable to get file inputpath"); } //TODO: String capturedDate = getCurrentDate(); KeyRowWrapper keyRow = KissmetricsRowParser.parseJsonRowToValidJson(rawJsonRow, fileNameInputToMapper, filePath); if (keyRow != null) { if (keyRow.getKey() != null && keyRow.getJsonrow() != null && keyRow.getReturnCode() == TRACKING_COUNTER.VALID_JSON_ROW) { context.getCounter(TRACKING_COUNTER.VALID_JSON_ROW).increment(1); //TODO: Monitoring use of Octal Decoder if (keyRow.getMonitoringCode() == TRACKING_COUNTER.OCTAL_PARSING_NEEDED) { context.getCounter(TRACKING_COUNTER.OCTAL_PARSING_NEEDED).increment(1); } context.write(new Text(keyRow.getKey()), new Text(keyRow.getJsonrow())); } else if (keyRow.getMonitoringCode() == TRACKING_COUNTER.INVALID_DATE) { logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); context.getCounter(TRACKING_COUNTER.INVALID_DATE).increment(1); } else { // Assume it's an invalid logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); } } else { logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); } }
From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String s = value.toString(); JSONParser jsonParser = new JSONParser(); try {//from w w w. j a va2 s . co m JSONObject jsonObject = (JSONObject) jsonParser.parse(s); Set<String> keyset = jsonObject.keySet(); String jsonValue = ""; for (String jsonkey : keyset) { jsonValue = (String) jsonObject.get(jsonkey).toString(); if (jsonValue == null || jsonValue == "") { jsonValue = ""; } String lenValue = String.valueOf(jsonValue.length()); if (lenValue == null || lenValue == "") { lenValue = "0"; } context.write(new Text(jsonkey), new Text("1\t" + lenValue)); } } catch (ParseException e) { e.printStackTrace(); } }
From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.java
License:Open Source License
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int total = 0; int maxLen = 0; int currentLen = 0; List<String> valueList = null; for (Text value : values) { valueList = Arrays.asList(value.toString().split("\t")); total += Integer.valueOf(valueList.get(0)); if (valueList.size() > 1) { currentLen = Integer.valueOf(valueList.get(1)); if (maxLen < currentLen) { maxLen = currentLen;// w ww . ja va 2s . c om } } } context.write(key, new Text(String.valueOf(total) + "\t" + String.valueOf(maxLen))); }
From source file:com.justgiving.raven.kissmetrics.utils.KissmetricsRowParser.java
License:Open Source License
/*** * Used to parse, escape and enrich Kissmetircs Json records * /*from www . j a v a 2 s . c o m*/ * @param rawJsonRow * @param fileNameInputToMapper * @return */ public static KeyRowWrapper parseJsonRowToValidJson(Text rawJsonRow, String fileNameInputToMapper, String filePath) { String jsonString = ""; boolean wasOctalParsingNeeded = false; try { System.setProperty("file.encoding", "UTF-8"); s = rawJsonRow.toString(); Charset charSet = Charset.forName("UTF-8"); byte[] encoded = s.getBytes(charSet); decodedStrRaw = new String(encoded, charSet); // Test new Apache Lang3 // decodedStr = StringEscapeUtils.unescapeJava(decodedStr); //Replace any remaining Octal encoded Characters decodedStrParsed = replaceOctalUft8Char(decodedStrRaw); if (decodedStrParsed.compareTo(decodedStrRaw) == 0) { wasOctalParsingNeeded = false; } else { wasOctalParsingNeeded = true; } if (decodedStrParsed != null && decodedStrParsed != "") { JSONObject jsonObject = (JSONObject) jsonParser.parse(decodedStrParsed); // get email and user_id if (jsonObject.get("_p2") != null) { p2 = jsonObject.get("_p2").toString().toLowerCase(); if (p2.contains("@")) { jsonObject.put("user_email", p2); jsonObject.put("user_email_back", p2); } else if (p2 != null && p2.length() > 0) { jsonObject.put("user_km_id", p2); } } // get email and user_id if (jsonObject.get("_p") != null) { p = jsonObject.get("_p").toString().toLowerCase(); if (p.contains("@")) { jsonObject.put("user_email", p); jsonObject.put("user_email_back", p); } else if (p != null && p.length() > 0) { jsonObject.put("user_km_id", p); } } // Add Event if (jsonObject.get("_n") != null) { event = jsonObject.get("_n").toString(); if (event != null) { jsonObject.put("event", event); } } // add unix timestamp and datetime long currentDateTime = System.currentTimeMillis(); Date currentDate = new Date(currentDateTime); if (jsonObject.get("_t") == null) { return (new KeyRowWrapper(jsonString, null, TRACKING_COUNTER.INVALID_JSON_ROW, TRACKING_COUNTER.INVALID_DATE)); } long kmTimeDateMilliSeconds; long kmTimeDateMilliSecondsMobile; try { tTimestampValue = (String) jsonObject.get("_t").toString(); //See if new record with server timestamp if (jsonObject.get("_server_timestamp") != null) { serverTimestampValue = (String) jsonObject.get("_server_timestamp").toString(); } else { serverTimestampValue = "0"; } //Deal with mobile timedate cases if (jsonObject.get("_c") != null) { if (serverTimestampValue.equals("0")) { timestampValueOutput = tTimestampValue; kmTimeDateMilliSecondsMobile = 0; } else { timestampValueOutput = serverTimestampValue; mobileTimestampValueOutput = tTimestampValue; jsonObject.put("km_timestamp_mobile", mobileTimestampValueOutput); kmTimeDateMilliSecondsMobile = Long.parseLong(mobileTimestampValueOutput) * 1000; } } else {//Ignore server time //TODO Need a way to resolve mobile identify events serverTimestampValue = "0"; timestampValueOutput = tTimestampValue; kmTimeDateMilliSecondsMobile = 0; } jsonObject.put("km_timestamp", timestampValueOutput); kmTimeDateMilliSeconds = Long.parseLong(timestampValueOutput) * 1000; } catch (Exception e) { return (new KeyRowWrapper(jsonString, timestampValueOutput, TRACKING_COUNTER.INVALID_JSON_ROW, TRACKING_COUNTER.INVALID_DATE)); } Calendar calendar = Calendar.getInstance(); calendar.setTimeInMillis(kmTimeDateMilliSeconds); String event_timedate = dateFormatter.format(calendar.getTime()); jsonObject.put("event_timedate", event_timedate); if (kmTimeDateMilliSecondsMobile > 0) { calendar.setTimeInMillis(kmTimeDateMilliSecondsMobile); String event_timedate_mobile = dateFormatter.format(calendar.getTime()); jsonObject.put("event_timedate_mobile", event_timedate_mobile); } // add Map Reduce json_filename jsonObject.put("filename", fileNameInputToMapper); jsonString = jsonObject.toString(); //Add bucket path jsonObject.put("bucket", filePath); jsonString = jsonObject.toString(); // TODO add the time the record was processed by Mapper: //jsonObject.put("capturedDate", capturedDate); //jsonString = jsonObject.toString(); return (new KeyRowWrapper(jsonString, timestampValueOutput, TRACKING_COUNTER.VALID_JSON_ROW, wasOctalParsingNeeded ? null : TRACKING_COUNTER.OCTAL_PARSING_NEEDED)); } } catch (Exception e) { // System.err.println(e.getMessage()); // e.printStackTrace(); StringWriter errors = new StringWriter(); e.printStackTrace(new PrintWriter(errors)); logger.error(errors.toString()); logger.error("log - file " + fileNameInputToMapper); System.out.println("file " + fileNameInputToMapper); logger.error("log - row content: " + rawJsonRow.toString().replace("\t", "")); System.err.println("row content: " + rawJsonRow.toString().replace("\t", "")); System.err.println("Error skipping row"); logger.error("Log - Error skipping row"); } return null; }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { log.debug("< ({}, {})", key, value); k.clear();//w ww . j a va 2 s .c o m v.clear(); String[] tokens = value.toString().split("\\t"); if (tokens.length > 0) { if ((tokens.length == 3) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0) && (tokens[2].trim().length() > 0)) { resource(k, v, tokens[0], tokens[1], tokens[2]); } else if ((tokens.length == 4) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0) && (tokens[3].trim().length() > 0)) { if (tokens[2].trim().length() == 0) { literal(k, v, tokens[0], tokens[1], tokens[3]); } else { if (tokens[2].startsWith(LANG)) { literal_lang(k, v, tokens[0], tokens[1], tokens[3], tokens[2].substring(tokens[2].lastIndexOf('/') + 1)); } else { if (tokens[1].equals(OBJECT_KEY)) { literal2(k, v, tokens[0], tokens[1], tokens[2], tokens[3]); } else if ((tokens[1].equals(OBJECT_NAME)) && (tokens[2].startsWith(GUID))) { literal2(k, v, tokens[0], tokens[1], tokens[2], tokens[3]); } else { log.warn("Unexpected data, ignoring: {}", value); } } } } else { if (tokens.length < 3) { log.warn("Line with only {} tokens: {}", tokens.length, value.toString()); } else { log.warn("Line with one or more empty tokens: {}", value.toString()); } } } emit(context, k, v); }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { log.debug("< ({}, {})", key, value); k.clear();//from ww w. j a v a 2s . com byte[] kb = key.getBytes(); k.append(kb, 0, key.getLength()); byte[] vb = value.toString().getBytes(); k.append(vb, 0, vb.length); context.write(k, nullWritable); log.debug("> ({}, {})", k, nullWritable); } }
From source file:com.kit.udf.UDFDateFormat.java
License:Apache License
public Text evaluate(Text dateText, Text patternText) { if (dateText == null || patternText == null) { return null; }/*from ww w . j av a2 s . co m*/ try { if (!patternText.equals(lastPatternText)) { formatter.applyPattern(patternText.toString()); lastPatternText.set(patternText); } } catch (Exception e) { return null; } Date date; try { date = standardFormatter.parse(dateText.toString()); result.set(formatter.format(date)); return result; } catch (ParseException e) { return null; } }
From source file:com.kit.udf.UDFSubstrForOracle.java
License:Apache License
public Text evaluate(Text t, IntWritable pos, IntWritable len) { if ((t == null) || (pos == null) || (len == null)) { return null; }/*from w ww. j av a2 s.c o m*/ r.clear(); if ((len.get() <= 0)) { //return r; return null; } String s = t.toString(); if ((Math.abs(pos.get()) > s.length())) { //return r; return null; } int start, end; if (pos.get() > 0) { start = pos.get() - 1; } else if (pos.get() < 0) { start = s.length() + pos.get(); } else { start = 0; } if ((s.length() - start) < len.get()) { end = s.length(); } else { end = start + len.get(); } r.set(s.substring(start, end)); return r; }
From source file:com.knewton.mrtool.io.JsonRecordReader.java
License:Apache License
/** * {@inheritDoc}//from w w w .j a va 2 s . c om */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { // This is here in case nextKeyValue() gets called again after the record reader reached the // end of the split and doesn't have any more records to return. It avoids a null pointer // exception. if (key == null) { key = new LongWritable(); } key.set(pos); Text jsonText = new Text(); int newSize = 0; if (getFilePosition() <= end) { newSize = in.readLine(jsonText); if (newSize > 0 && !jsonText.toString().isEmpty()) { for (ObjectDecorator<String> decorator : decorators) { jsonText = new Text(decorator.decorateObject(jsonText.toString())); } // This helps with avoiding to supress warnings for the entire method. @SuppressWarnings("unchecked") V tempValue = (V) gson.fromJson(jsonText.toString(), getDataClass(jsonText.toString())); value = tempValue; } pos += newSize; } if (newSize == 0 || jsonText.toString().isEmpty()) { key = null; value = null; return false; } else { return true; } }
From source file:com.knewton.mrtool.io.JsonRecordReaderTest.java
License:Apache License
/** * Tests the line reader in the record reader to see if records can be read correctly from the * beginning of an input stream.// w w w . jav a 2 s .c om * * @throws IOException * @throws InterruptedException */ @Test public void testJsonRecordReader() throws IOException, InterruptedException { JsonRecordReader<Text> rr = new JsonRecordReader<Text>() { @Override protected Class<?> getDataClass(String jsonStr) { return Text.class; } }; Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID()); FileSplit fileSplit = new FileSplit(new Path("recs.2013-03-20_02_52.log"), 0, recommendationBytes.length, new String[0]); new MockUp<FileSystem>() { @Mock public FSDataInputStream open(Path f) throws IOException { return new FSDataInputStream(new SeekableByteArrayInputStream(recommendationBytes)); } }; // Initialize it to get the compression codecs rr.initialize(fileSplit, context); // close the line reader and reopen it. rr.close(); LineReader lineReader = rr.initLineReader(fileSplit, conf); Text line = new Text(); lineReader.readLine(line); assertEquals(DummyJsonRecommendations.jsonRecommendations[0], line.toString()); line = new Text(); lineReader.readLine(line); assertEquals(DummyJsonRecommendations.jsonRecommendations[1], line.toString()); lineReader.close(); }