List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.ckelsel.hadoop.MaxTemperature.AppMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); }
From source file:com.cloudera.castagna.logparser.mr.StatusCodesStatsMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { log.debug("< ({}, {})", key, value); try {/*from w ww . j a v a 2 s .c om*/ Map<String, String> logLine = parser.parseLine(value.toString()); if (logLine.get(LogParser.STATUS_CODE) != null) { StringBuilder outKey = new StringBuilder(); outKey.append(logLine.get(LogParser.TIME_YEAR)); outKey.append(Constants.SEPARATOR); outKey.append(logLine.get(LogParser.TIME_MONTH)); outKey.append(Constants.SEPARATOR); outKey.append(logLine.get(LogParser.TIME_DAY)); outKey.append(Constants.SEPARATOR); outKey.append(logLine.get(LogParser.TIME_HOUR)); outKey.append(Constants.SEPARATOR); outKey.append(logLine.get(LogParser.TIME_MINUTE)); // outKey.append(Constants.SPACE); // outKey.append(logLine.get(LogParser.URL)); StringBuilder outValue = new StringBuilder(); outValue.append(logLine.get(LogParser.STATUS_CODE)); outValue.append(Constants.COLON); outValue.append(Constants.ONE); outTextKey.clear(); outTextKey.set(outKey.toString()); outTextValue.clear(); outTextValue.set(outValue.toString()); context.write(outTextKey, outTextValue); log.debug("> ({}, {})", outTextKey, outTextValue); } else { // TODO } } catch (ParseException e) { log.debug("Error parsing: {} {}", key, value); } }
From source file:com.cloudera.castagna.logparser.mr.StatusCodesStatsReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Map<String, Integer> counters = new TreeMap<String, Integer>(); Iterator<Text> iter = values.iterator(); while (iter.hasNext()) { Text value = iter.next(); log.debug("< ({}, {})", key, value); Utils.increment(counters, value); }/*from www. j a v a 2 s .c om*/ StringBuilder outValue = new StringBuilder(); String[] ks = key.toString().split(Constants.SPACE); String date = ks[0]; if (ks.length > 1) { String url = ks[1]; outTextKey.clear(); outTextKey.set(url); outValue.append(date); outValue.append(Constants.TAB); } else { outTextKey.clear(); outTextKey.set(date); } int total = Utils.total(counters); outValue.append(total); outValue.append(Constants.TAB); for (String k : counters.keySet()) { outValue.append(k); outValue.append(Constants.COLON); outValue.append(counters.get(k)); outValue.append(Constants.TAB); } outTextValue.clear(); outTextValue.set(outValue.toString()); context.write(outTextKey, outTextValue); log.debug("> ({}, {})", outTextKey, outTextValue); }
From source file:com.cloudera.castagna.logparser.mr.TranscodeLogsMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { log.debug("< ({}, {})", key, value); try {/* ww w . j a v a 2 s. c o m*/ Map<String, String> logLine = parser.parseLine(value.toString()); StringBuilder outValue = new StringBuilder(); outValue.append(logLine.get(LogParser.REMOTE_HOSTNAME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.USERNAME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.HTTP_METHOD)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.URL)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_YEAR)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_MONTH)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_DAY)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_HOUR)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_MINUTE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_SECOND)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIMESTAMP)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.STATUS_CODE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.SIZE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.ELAPSED_TIME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.USER_AGENT)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.REFERER)); outValue.append(Constants.TAB); outValue.append(logLine.get("JSESSIONID")); outValue.append(Constants.TAB); outValue.append(logLine.get("SITESERVER")); outValue.append(Constants.TAB); outTextValue.clear(); outTextValue.set(outValue.toString()); context.write(NullWritable.get(), outTextValue); log.debug("> ({}, {})", NullWritable.get(), outTextValue); } catch (ParseException e) { log.debug("Error parsing: {} {}", key, value); } }
From source file:com.cloudera.castagna.logparser.Utils.java
License:Apache License
public static void increment(Map<String, Integer> counters, Text value) { for (String v : value.toString().split(Constants.SPACE)) { String[] vs = v.split(Constants.COLON); String status_code = vs[0]; int count = Integer.parseInt(vs[1]); if (counters.containsKey(status_code)) { counters.put(status_code, count + counters.get(status_code)); } else {//from w w w . j av a 2s . c o m counters.put(status_code, count); } } }
From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java
License:Apache License
/** * return a mapping of expected keys -> records *///from ww w.j a v a 2s . com private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java
License:Apache License
/** * return a mapping of expected keys -> records *///from ww w .ja v a2 s . co m private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
From source file:com.cloudera.dataflow.spark.HadoopFileFormatPipelineTest.java
License:Open Source License
@Test public void testSequenceFile() throws Exception { populateFile();// w w w . j a va2 s . co m Pipeline p = Pipeline.create(PipelineOptionsFactory.create()); @SuppressWarnings("unchecked") Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass = (Class<? extends FileInputFormat<IntWritable, Text>>) (Class<?>) SequenceFileInputFormat.class; HadoopIO.Read.Bound<IntWritable, Text> read = HadoopIO.Read.from(inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class); PCollection<KV<IntWritable, Text>> input = p.apply(read); @SuppressWarnings("unchecked") Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass = (Class<? extends FileOutputFormat<IntWritable, Text>>) (Class<?>) TemplatedSequenceFileOutputFormat.class; @SuppressWarnings("unchecked") HadoopIO.Write.Bound<IntWritable, Text> write = HadoopIO.Write.to(outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class); input.apply(write.withoutSharding()); EvaluationResult res = SparkPipelineRunner.create().run(p); res.close(); IntWritable key = new IntWritable(); Text value = new Text(); try (Reader reader = new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) { int i = 0; while (reader.next(key, value)) { assertEquals(i, key.get()); assertEquals("value-" + i, value.toString()); i++; } } }
From source file:com.cloudera.impala.TestUdf.java
License:Apache License
public Text evaluate(Text a, Text b) { if (a == null || b == null) return null; return new Text(a.toString() + b.toString()); }
From source file:com.cloudera.knittingboar.io.TestSplitCalcs.java
License:Apache License
/** * /*w w w.j av a 2s . c om*/ * - use the TextInputFormat.getSplits() to test pulling split info * @throws IOException * */ public void testGetSplits() throws IOException { TextInputFormat input = new TextInputFormat(); JobConf job = new JobConf(defaultConf); Path file = new Path(workDir, "testGetSplits.txt"); int tmp_file_size = 200000; long block_size = localFs.getDefaultBlockSize(); System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB"); Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < tmp_file_size; i++) { writer.write( "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99"); writer.write("\n"); } } finally { writer.close(); } System.out.println("file write complete"); // A reporter that does nothing Reporter reporter = Reporter.NULL; // localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, file); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); int numSplits = 1; InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("requested " + numSplits + " splits, splitting: got = " + splits.length); assertEquals(2, splits.length); System.out.println("---- debug splits --------- "); for (int x = 0; x < splits.length; x++) { System.out.println("> Split [" + x + "]: " + splits[x].getLength() + ", " + splits[x].toString() + ", " + splits[x].getLocations()[0]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[x], job, reporter); try { int count = 0; while (reader.next(key, value)) { if (count == 0) { System.out.println("first: " + value.toString()); assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p")); } count++; } System.out.println("last: " + value.toString()); assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p")); } finally { reader.close(); } } // for each split }