List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:com.blackberry.logdriver.mapred.boom.BoomRecordReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override// w w w .j a va 2s.c o m public boolean next(LogLineData key, Text value) throws IOException { while (lines.size() == 0) { // If we're out of lines in the current record, then get the next record - // unless we're out of records or past the end of where we should be. while (reader == null || reader.hasNext() == false || reader.pastSync(end)) { // If we have another file, then init that and keep going, otherwise, // just return false. currentFile++; if (currentFile >= split.getNumPaths()) { return false; } initCurrentFile(); } Record record = reader.next(); lld = new LogLineData(); lld.setBlockNumber((Long) record.get("blockNumber")); lld.setCreateTime((Long) record.get("createTime")); second = (Long) record.get("second"); lineNumber = 0; lines.addAll((List<Record>) record.get("logLines")); } Record line = lines.pollFirst(); long ms = (Long) line.get("ms"); String message = line.get("message").toString(); int eventId = (Integer) line.get("eventId"); ++lineNumber; key.set(lld); key.setLineNumber(lineNumber); key.setTimestamp(second * 1000 + ms); key.setEventId(eventId); value.set(message); pos = reader.tell(); return true; }
From source file:com.chimpler.example.bayes.DataToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input tsv file] [output sequence file]"); return;/*from w ww. ja va 2s. com*/ } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); int count = 0; BufferedReader reader = new BufferedReader(new FileReader(inputFileName)); Text key = new Text(); Text value = new Text(); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] tokens = line.split("\t", 2); if (tokens.length != 2) { System.out.println("Skip line: " + line); continue; } String category = tokens[0]; // String id = tokens[1]; String message = tokens[1]; key.set("/" + category + "/"); value.set(message); writer.append(key, value); count++; } reader.close(); writer.close(); System.out.println("Wrote " + count + " entries."); }
From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java
License:Apache License
@Override public void write(Text keyValue) throws IOException, InterruptedException { Text key = new Text(); Text value = new Text(); StringTokenizer str1 = new StringTokenizer(keyValue.toString(), "\t"); if (str1.hasMoreElements()) { key.set(str1.nextToken()); }/* www . j ava 2s . co m*/ if (str1.hasMoreElements()) { value.set(str1.nextToken()); } if (key == new Text()) { return; } String[] vertexInfo = key.toString().split(":"); String vertexID = vertexInfo[0]; String vertexValue = vertexInfo[1]; if (value == new Text()) { try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute( "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } return; } String[] strs = value.toString().split(" "); String[] outgoingVertexIDs = new String[strs.length]; String[] weights = new String[strs.length]; for (int i = 0; i < strs.length; i++) { String[] str = strs[i].split(":"); outgoingVertexIDs[i] = str[0]; weights[i] = str[1]; } try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } for (int i = 0; i < outgoingVertexIDs.length; i++) { if (!hasVertex(outgoingVertexIDs[i])) { client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])"); } /* * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] + * "')"); } */ client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','" + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } }
From source file:com.cloudera.dataflow.spark.HadoopFileFormatPipelineTest.java
License:Open Source License
private void populateFile() throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); try (Writer writer = SequenceFile.createWriter(new Configuration(), Writer.keyClass(IntWritable.class), Writer.valueClass(Text.class), Writer.file(new Path(this.inputFile.toURI())))) { for (int i = 0; i < 5; i++) { key.set(i);//w ww .ja v a 2s .c o m value.set("value-" + i); writer.append(key, value); } } }
From source file:com.cloudera.sa.ExcelRecordReader.java
License:Apache License
private Text getCellValue(Cell cell) { Text out = new Text(); CellType cellType = cell.getCellTypeEnum(); if (cellType == CellType.STRING) { out.set(cell.getStringCellValue()); } else if (cellType == CellType.NUMERIC) { out.set(String.valueOf(cell.getNumericCellValue())); } else if (cellType == CellType.FORMULA) { out.set(cell.getCellFormula());/*from w ww .j a v a 2 s. c o m*/ } else if (cellType == CellType.ERROR) { out.set(String.valueOf(cell.getErrorCellValue())); } else if (cellType == CellType.BOOLEAN) { out.set(String.valueOf(cell.getBooleanCellValue())); } else { out.set(""); } return out; }
From source file:com.cloudera.science.avro.streaming.AvroAsJSONRecordReader.java
License:Open Source License
@Override public boolean next(Text key, Text value) throws IOException { if (!reader.hasNext() || reader.pastSync(end)) { return false; }//from w w w.j a v a 2 s . co m datum = reader.next(datum); key.set(datum.toString()); return true; }
From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayMapper.java
License:Open Source License
@Override protected void map(BytesWritable key, Text value, Mapper<BytesWritable, Text, Text, ScoreInfo>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub String converted = ConvertUtil.convertEncoding(value, "GBK"); if (converted == null) { return;/*from w w w.j a va 2s.c om*/ } String[] values = converted.toString().split("\t"); for (int i = 0; i < values.length; i++) { logger.info("the i=" + i + " value=" + values[i]); } Map<String, String> map = new HashMap<String, String>(); for (ValueExtractor extractor : extractors) { String result = extractor.validateAndExtract(values); map.put(extractor.getMapping(), result); } String enttid = map.get(ENTT_ID); if (enttid == null) { // ?? return; } ScoreInfo valueout = new ScoreInfo(); valueout.setEnttId(map.get(ENTT_ID)); valueout.setScoreType(map.get(SCORE_TYPE)); valueout.setScoreBalance(map.get(SCORE_BALANCE)); valueout.setStatDate(context.getConfiguration().get(STAT_MONTH)); valueout.setStatMonth(context.getConfiguration().get(STAT_MONTH)); logger.info("ScoreInfo tostr:" + valueout.toString()); Text keyout = new Text(); keyout.set(enttid); /* context.write(keyout, valueout);*/ }
From source file:com.datasalt.utils.commons.io.DumpTextFileAsSequenceFile.java
License:Apache License
public static void dump(String input, String output) throws IOException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.get(conf); BufferedReader reader = new BufferedReader(new FileReader(new File(input))); String line = ""; Text t1 = new Text(); Text t2 = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fS, conf, new Path(output), Text.class, Text.class); while ((line = reader.readLine()) != null) { String[] fields = line.split("\t"); t1.set(fields[0]); t2.set(fields[1]);/*from ww w . j a va 2 s . c o m*/ writer.append(t1, t2); } writer.close(); reader.close(); }
From source file:com.datascience.hadoop.CsvRecordReader.java
License:Apache License
@Override public boolean next(LongWritable key, ListWritable<Text> value) throws IOException { value.clear();//from www. j av a 2 s . c o m try { if (iterator.hasNext()) { CSVRecord record = iterator.next(); position++; colLength = colLength == null ? record.size() : colLength; if ((!record.isConsistent() || record.size() != colLength) && strict) { String message = String.format("%s: %s", "inconsistent record at position", position); throw new CsvParseException(message); } key.set(record.getRecordNumber()); for (int i = 0; i < record.size(); i++) { String item = record.get(i); if (item == null) { value.add(null); } else { Text text = cache[i]; if (text == null) { text = new Text(); cache[i] = text; } text.set(item); value.add(text); } } //position = record.getCharacterPosition(); return true; } } catch (Exception e) { LOGGER.warn("failed to parse record at position: " + position); if (strict) { throw e; } else { return next(key, value); } } return false; }
From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java
License:Open Source License
private void writeMapFile() throws Exception { Path path = Testfile.MAPFILE.filepath(); Text key = new Text(); Text value = new Text(); long fsMinBlockSize = conf.getLong("dfs.namenode.fs-limits.min-block-size", 0); long testBlockSize = (blockSize < fsMinBlockSize) ? fsMinBlockSize : (long) blockSize; MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.keyClass(key.getClass()), MapFile.Writer.valueClass(value.getClass()), MapFile.Writer.compression(SequenceFile.CompressionType.NONE), SequenceFile.Writer.blockSize(testBlockSize), SequenceFile.Writer.bufferSize((int) testBlockSize)); for (int i = 0; i < testSize; i++) { key.set(getKey(i)); value.set(getValue());/*from w w w . j a v a 2 s . c om*/ writer.append(key, value); } IOUtils.closeStream(writer); }