Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName)
        throws AccumuloException, AccumuloSecurityException {
    Preconditions.checkArgument(header.length >= record.length,
            "Cannot have more columns in record (%s) than defined in header (%s)",
            new Object[] { header.length, record.length });

    final BatchWriter recordBw, schemaBw;
    try {//w w w .  j  a  va 2 s.  c om
        recordBw = mtbw.getBatchWriter(recordTableName);
        schemaBw = mtbw.getBatchWriter(schemaTableName);
    } catch (TableNotFoundException e) {
        log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e);
        throw new RuntimeException(e);
    }

    // Some temp Texts to avoid lots of object allocations
    final Text cfHolder = new Text();
    final HashMap<String, Long> counts = new HashMap<String, Long>();

    // write records
    Mutation recordMutation = new Mutation(rowId);
    for (int i = 0; i < record.length; i++) {
        final String columnName = header[i];
        final String columnValue = record[i];

        if (counts.containsKey(columnName)) {
            counts.put(columnName, counts.get(columnName) + 1);
        } else {
            counts.put(columnName, 1l);
        }

        cfHolder.set(columnName);

        recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes()));
    }

    recordBw.addMutation(recordMutation);

    // update counts in schema
    for (Entry<String, Long> schemaUpdate : counts.entrySet()) {
        Mutation schemaMutation = new Mutation(schemaUpdate.getKey());

        schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue()));
        schemaBw.addMutation(schemaMutation);
    }
}

From source file:be.uantwerpen.adrem.disteclat.ItemReaderReducer.java

License:Apache License

/**
 * Writes the singletons distribution to file OSingletonsDistribution. The distribution is obtained using Round-Robin
 * allocation./*from  w ww.  j  av  a2 s  .co m*/
 * 
 * @param sortedSingletons
 *          the sorted list of singletons
 * @throws IOException
 * @throws InterruptedException
 */
private void writeSingletonsDistribution(List<Integer> sortedSingletons)
        throws IOException, InterruptedException {
    int end = Math.min(numberOfMappers, sortedSingletons.size());

    Text mapperId = new Text();
    Text assignedItems = new Text();

    // Round robin assignment
    for (int ix = 0; ix < end; ix++) {
        StringBuilder sb = new StringBuilder();
        for (int ix1 = ix; ix1 < sortedSingletons.size(); ix1 += numberOfMappers) {
            sb.append(sortedSingletons.get(ix1)).append(" ");
        }

        mapperId.set("" + ix);
        assignedItems.set(sb.substring(0, sb.length() - 1));
        mos.write(OSingletonsDistribution, mapperId, assignedItems);
    }
}

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

private static void read(Reader reader, Text key, BytesWritable val) {
    try {//from w w w. j a va  2 s.c o  m
        if (reader.next(key, val))
            return;
    } catch (Throwable t) {
        t.printStackTrace();
    }
    key.set("");
}

From source file:cascading.scheme.hadoop.TextDelimited.java

License:Open Source License

protected void writeHeader(SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
    Fields fields = sinkCall.getOutgoingEntry().getFields();

    Text text = (Text) sinkCall.getContext()[0];
    StringBuilder line = (StringBuilder) sinkCall.getContext()[1];
    Charset charset = (Charset) sinkCall.getContext()[2];

    line = (StringBuilder) delimitedParser.joinFirstLine(fields, line);

    text.set(line.toString().getBytes(charset));

    sinkCall.getOutput().collect(null, text);

    line.setLength(0);/* ww w .  j  a va  2s.co m*/
}

From source file:cascading.scheme.hadoop.TextDelimited.java

License:Open Source License

@Override
public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
        throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();

    Text text = (Text) sinkCall.getContext()[0];
    StringBuilder line = (StringBuilder) sinkCall.getContext()[1];
    Charset charset = (Charset) sinkCall.getContext()[2];

    Iterable<String> strings = tupleEntry.asIterableOf(String.class);

    line = (StringBuilder) delimitedParser.joinLine(strings, line);

    text.set(line.toString().getBytes(charset));

    sinkCall.getOutput().collect(null, text);

    line.setLength(0);// w w  w.ja  v a 2s  .c o m
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
        throws IOException {
    Text text = (Text) sinkCall.getContext()[0];
    Charset charset = (Charset) sinkCall.getContext()[1];
    String line = sinkCall.getOutgoingEntry().getTuple().toString();

    text.set(line.getBytes(charset));

    // it's ok to use NULL here so the collector does not write anything
    sinkCall.getOutput().collect(null, text);
}

From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java

License:Apache License

public Text evaluate(Text format) {
    if (format == null) {
        format.set("yyyy-MM-dd HH:mm:ss");
    }//from  w w  w . ja  va  2 s . c  o m

    Date date = new Date();
    formatter.applyPattern(format.toString());
    result.set(formatter.format(date));
    return result;
}

From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java

License:Apache License

public Text evaluate(Text format, IntWritable days) {
    if (format == null) {
        format.set("yyyy-MM-dd HH:mm:ss");
    }/*from w  w  w.  j ava 2  s .  co m*/

    formatter.applyPattern(format.toString());
    Date date = new Date();
    calendar.setTime(date);
    calendar.add(Calendar.DAY_OF_MONTH, days.get());
    Date newDate = calendar.getTime();
    result.set(formatter.format(newDate));
    return result;
}

From source file:cn.com.warlock.SequenceFilesTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    String hdfsUri = "hdfs://hlg-2p238-fandongsheng:8020";
    String pathStr = "/tmp/example/seq1";
    String compressType = "1";

    // ??windows?
    // System.setProperty("hadoop.home.dir", "E:\\tools");

    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", hdfsUri);
    Path path = new Path(pathStr);

    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*  w  ww.  j a v  a  2  s  . c o m*/
        SequenceFile.Writer.Option pathOpt = SequenceFile.Writer.file(path);
        SequenceFile.Writer.Option keyClassOpt = SequenceFile.Writer.keyClass(key.getClass());
        SequenceFile.Writer.Option valueClassOpt = SequenceFile.Writer.valueClass(value.getClass());
        SequenceFile.Writer.Option compressionOpt = null;

        // compress type
        if (compressType.equals("1")) {
            System.out.println("compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        } else if (compressType.equals("2")) {
            System.out.println("compress record");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.RECORD);
        } else if (compressType.equals("3")) {
            System.out.println("compress block");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.BLOCK);
        } else {
            System.out.println("Default : compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        }

        writer = SequenceFile.createWriter(conf, pathOpt, keyClassOpt, valueClassOpt, compressionOpt);

        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);

        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:cn.lhfei.hadoop.ch04.MapFileWriteDemo.java

License:Apache License

public static void main(String[] args) {
    String uri = args[0];/*from ww  w .jav  a 2 s .  c  om*/
    Configuration conf = new Configuration();
    FileSystem fs = null;

    IntWritable key = new IntWritable();
    Text value = new Text();
    MapFile.Writer writer = null;
    try {
        fs = FileSystem.get(URI.create(uri), conf);
        /*writer = new MapFile.Writer(conf, fs, uri, key.getClass(),
              value.getClass());*/

        writer = new MapFile.Writer(conf, new Path(uri), Writer.keyClass(key.getClass()),
                Writer.valueClass(value.getClass()));

        for (int i = 0; i < 1024; i++) {
            key.set(i + 1);
            value.set(DATA[i % DATA.length]);
            writer.append(key, value);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}