List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:accumulo.ingest.AccumuloLiveCsv.java
License:Apache License
protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName) throws AccumuloException, AccumuloSecurityException { Preconditions.checkArgument(header.length >= record.length, "Cannot have more columns in record (%s) than defined in header (%s)", new Object[] { header.length, record.length }); final BatchWriter recordBw, schemaBw; try {//w w w . j a va 2 s. c om recordBw = mtbw.getBatchWriter(recordTableName); schemaBw = mtbw.getBatchWriter(schemaTableName); } catch (TableNotFoundException e) { log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e); throw new RuntimeException(e); } // Some temp Texts to avoid lots of object allocations final Text cfHolder = new Text(); final HashMap<String, Long> counts = new HashMap<String, Long>(); // write records Mutation recordMutation = new Mutation(rowId); for (int i = 0; i < record.length; i++) { final String columnName = header[i]; final String columnValue = record[i]; if (counts.containsKey(columnName)) { counts.put(columnName, counts.get(columnName) + 1); } else { counts.put(columnName, 1l); } cfHolder.set(columnName); recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes())); } recordBw.addMutation(recordMutation); // update counts in schema for (Entry<String, Long> schemaUpdate : counts.entrySet()) { Mutation schemaMutation = new Mutation(schemaUpdate.getKey()); schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue())); schemaBw.addMutation(schemaMutation); } }
From source file:be.uantwerpen.adrem.disteclat.ItemReaderReducer.java
License:Apache License
/** * Writes the singletons distribution to file OSingletonsDistribution. The distribution is obtained using Round-Robin * allocation./*from w ww. j av a2 s .co m*/ * * @param sortedSingletons * the sorted list of singletons * @throws IOException * @throws InterruptedException */ private void writeSingletonsDistribution(List<Integer> sortedSingletons) throws IOException, InterruptedException { int end = Math.min(numberOfMappers, sortedSingletons.size()); Text mapperId = new Text(); Text assignedItems = new Text(); // Round robin assignment for (int ix = 0; ix < end; ix++) { StringBuilder sb = new StringBuilder(); for (int ix1 = ix; ix1 < sortedSingletons.size(); ix1 += numberOfMappers) { sb.append(sortedSingletons.get(ix1)).append(" "); } mapperId.set("" + ix); assignedItems.set(sb.substring(0, sb.length() - 1)); mos.write(OSingletonsDistribution, mapperId, assignedItems); } }
From source file:boa.datagen.SeqSortMerge.java
License:Apache License
private static void read(Reader reader, Text key, BytesWritable val) { try {//from w w w. j a va 2 s.c o m if (reader.next(key, val)) return; } catch (Throwable t) { t.printStackTrace(); } key.set(""); }
From source file:cascading.scheme.hadoop.TextDelimited.java
License:Open Source License
protected void writeHeader(SinkCall<Object[], OutputCollector> sinkCall) throws IOException { Fields fields = sinkCall.getOutgoingEntry().getFields(); Text text = (Text) sinkCall.getContext()[0]; StringBuilder line = (StringBuilder) sinkCall.getContext()[1]; Charset charset = (Charset) sinkCall.getContext()[2]; line = (StringBuilder) delimitedParser.joinFirstLine(fields, line); text.set(line.toString().getBytes(charset)); sinkCall.getOutput().collect(null, text); line.setLength(0);/* ww w . j a va 2s.co m*/ }
From source file:cascading.scheme.hadoop.TextDelimited.java
License:Open Source License
@Override public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); Text text = (Text) sinkCall.getContext()[0]; StringBuilder line = (StringBuilder) sinkCall.getContext()[1]; Charset charset = (Charset) sinkCall.getContext()[2]; Iterable<String> strings = tupleEntry.asIterableOf(String.class); line = (StringBuilder) delimitedParser.joinLine(strings, line); text.set(line.toString().getBytes(charset)); sinkCall.getOutput().collect(null, text); line.setLength(0);// w w w.ja v a 2s .c o m }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
@Override public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { Text text = (Text) sinkCall.getContext()[0]; Charset charset = (Charset) sinkCall.getContext()[1]; String line = sinkCall.getOutgoingEntry().getTuple().toString(); text.set(line.getBytes(charset)); // it's ok to use NULL here so the collector does not write anything sinkCall.getOutput().collect(null, text); }
From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java
License:Apache License
public Text evaluate(Text format) { if (format == null) { format.set("yyyy-MM-dd HH:mm:ss"); }//from w w w . ja va 2 s . c o m Date date = new Date(); formatter.applyPattern(format.toString()); result.set(formatter.format(date)); return result; }
From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java
License:Apache License
public Text evaluate(Text format, IntWritable days) { if (format == null) { format.set("yyyy-MM-dd HH:mm:ss"); }/*from w w w. j ava 2 s . co m*/ formatter.applyPattern(format.toString()); Date date = new Date(); calendar.setTime(date); calendar.add(Calendar.DAY_OF_MONTH, days.get()); Date newDate = calendar.getTime(); result.set(formatter.format(newDate)); return result; }
From source file:cn.com.warlock.SequenceFilesTest.java
License:Apache License
public static void main(String[] args) throws IOException { String hdfsUri = "hdfs://hlg-2p238-fandongsheng:8020"; String pathStr = "/tmp/example/seq1"; String compressType = "1"; // ??windows? // System.setProperty("hadoop.home.dir", "E:\\tools"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsUri); Path path = new Path(pathStr); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {/* w ww. j a v a 2 s . c o m*/ SequenceFile.Writer.Option pathOpt = SequenceFile.Writer.file(path); SequenceFile.Writer.Option keyClassOpt = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option valueClassOpt = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option compressionOpt = null; // compress type if (compressType.equals("1")) { System.out.println("compress none"); compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE); } else if (compressType.equals("2")) { System.out.println("compress record"); compressionOpt = SequenceFile.Writer.compression(CompressionType.RECORD); } else if (compressType.equals("3")) { System.out.println("compress block"); compressionOpt = SequenceFile.Writer.compression(CompressionType.BLOCK); } else { System.out.println("Default : compress none"); compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE); } writer = SequenceFile.createWriter(conf, pathOpt, keyClassOpt, valueClassOpt, compressionOpt); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:cn.lhfei.hadoop.ch04.MapFileWriteDemo.java
License:Apache License
public static void main(String[] args) { String uri = args[0];/*from ww w .jav a 2 s . c om*/ Configuration conf = new Configuration(); FileSystem fs = null; IntWritable key = new IntWritable(); Text value = new Text(); MapFile.Writer writer = null; try { fs = FileSystem.get(URI.create(uri), conf); /*writer = new MapFile.Writer(conf, fs, uri, key.getClass(), value.getClass());*/ writer = new MapFile.Writer(conf, new Path(uri), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass())); for (int i = 0; i < 1024; i++) { key.set(i + 1); value.set(DATA[i % DATA.length]); writer.append(key, value); } } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }