List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:authordetect.input.SingleBookReader.java
private boolean isBookStart(Text line) { String lineString = line.toString(); return lineString.toLowerCase().contains("start") && lineString.toLowerCase().contains("gutenberg"); }
From source file:average.AverageMapper.java
@Override public void map(LongWritable _key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String TempString = value.toString(); String[] array = TempString.split(","); String key = new String(""); key = array[0];/* www. j a v a 2s . c om*/ output.collect(new Text(key), new Text(array[1])); }
From source file:average.AverageReducer.java
@Override public void reduce(Text _key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Text key = _key; if (key.toString().equals("0Student_Id") != true) { int frequencyForYear = 0; int f = 0; double av = 0, c = 0.0; String ans = "", t = ""; while (values.hasNext()) { Text value = (Text) values.next(); t = value.toString();//from ww w .j av a 2 s . c o m frequencyForYear += Integer.parseInt(t); c++; // process value } av = frequencyForYear / c; ans = Double.toString(av); output.collect(key, new Text(ans)); } else { output.collect(key, new Text("Average")); } }
From source file:averagerating_youtube.AvgRating_CommCountMapper.java
@Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); String videoId = (fields[0]); if (!fields[6].isEmpty()) { this.v_rate = Float.parseFloat(fields[6]); } else {/*from w w w . ja v a2 s . c o m*/ this.v_rate = 0; } video_name.set(videoId); outTuple.setComment_count(1); outTuple.setVideo_rating(this.v_rate); context.write(video_name, outTuple); }
From source file:averagetemperature.AverageTemperatureMapper.java
@Override public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String[] line = value.toString().split(","); String datePart = line[1];// www. j a va 2 s . com String temp = line[10]; if (StringUtils.isNumeric(temp)) try { output.collect(new Text(datePart), new IntWritable(Integer.parseInt(temp))); } catch (NumberFormatException e) { } ; }
From source file:azkaban.common.web.JsonSequenceFileViewer.java
License:Apache License
public void displaySequenceFile(SequenceFile.Reader reader, PrintWriter output, int startLine, int endLine) throws IOException { if (logger.isDebugEnabled()) logger.debug("display json file"); try {/*www. j ava 2s. c o m*/ BytesWritable keyWritable = new BytesWritable(); BytesWritable valueWritable = new BytesWritable(); Text keySchema = reader.getMetadata().get(new Text("key.schema")); Text valueSchema = reader.getMetadata().get(new Text("value.schema")); JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString()); JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString()); // skip lines before the start line for (int i = 1; i < startLine; i++) reader.next(keyWritable, valueWritable); // now actually output lines for (int i = startLine; i <= endLine; i++) { boolean readSomething = reader.next(keyWritable, valueWritable); if (!readSomething) break; output.write(safeToString(keySerializer.toObject(keyWritable.getBytes()))); output.write("\t=>\t"); output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes()))); output.write("\n"); output.flush(); } } finally { reader.close(); } }
From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java
License:Apache License
public void displaySequenceFile(AzkabanSequenceFileReader.Reader reader, PrintWriter output, int startLine, int endLine) throws IOException { if (logger.isDebugEnabled()) { logger.debug("display json file"); }// w ww. j a v a 2s . c o m BytesWritable keyWritable = new BytesWritable(); BytesWritable valueWritable = new BytesWritable(); Text keySchema = reader.getMetadata().get(new Text("key.schema")); Text valueSchema = reader.getMetadata().get(new Text("value.schema")); JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString()); JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString()); // skip lines before the start line for (int i = 1; i < startLine; i++) { reader.next(keyWritable, valueWritable); } // now actually output lines for (int i = startLine; i <= endLine; i++) { boolean readSomething = reader.next(keyWritable, valueWritable); if (!readSomething) { break; } output.write(safeToString(keySerializer.toObject(keyWritable.getBytes()))); output.write("\t=>\t"); output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes()))); output.write("\n"); output.flush(); } }
From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java
License:Apache License
protected String generateFileNameForKeyValue(Text key, Text ver, String name) { String toks[] = key.toString().split(DatedCorpusGenMapper.DATE_LANG_SEP); if (toks == null || toks.length != 2) { return REJECTED_FILE; } else {/*from w w w.ja v a 2 s .co m*/ Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(Long.parseLong(toks[1])); int year = cal.get(Calendar.YEAR); int month = cal.get(Calendar.MONTH) + 1; int day = cal.get(Calendar.DAY_OF_MONTH); if (year < 2000 || year > 2011) { return REJECTED_FILE; } else { return toks[0] + File.separator + year + File.separator + year + "-" + month + "-" + day + EXTENSION; } } }
From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(dumpFile)) { fs.delete(dumpFile, true);//w w w. j ava 2 s .c om } else { fs.mkdirs(dumpFile.getParent()); } return new RecordWriter<Text, Text>() { public synchronized void write(Text key, Text versText) throws IOException { try { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET)); writer.write(versText.toString()); writer.close(); } catch (Exception e) { throw new RuntimeException("Error writing page versions: " + e.toString()); } } public synchronized void close(Reporter reporter) throws IOException { } }; }
From source file:babel.prep.extract.PageExtReducer.java
License:Apache License
public void reduce(Text key, Iterator<NutchChunk> values, OutputCollector<Text, Page> output, Reporter reporter) throws IOException { // Create a new page (potentially containing multiple versions) Page page = new Page(key.toString(), values); int numVersions = page.numVersions(); // Only care about it if we have at least one version if (numVersions > 0 && (page.pageURL().length() > 0))// && isBBCEnglish(page)) {/*from w w w . j a v a2 s .c o m*/ NutchPageExtractor.Stats.incPages(); NutchPageExtractor.Stats.incVersions(numVersions); output.collect(key, page); } else { NutchPageExtractor.Stats.incIgnoredPages(); } }