Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName)
        throws AccumuloException, AccumuloSecurityException {
    Preconditions.checkArgument(header.length >= record.length,
            "Cannot have more columns in record (%s) than defined in header (%s)",
            new Object[] { header.length, record.length });

    final BatchWriter recordBw, schemaBw;
    try {//w w w .  j  a  va 2 s.  c om
        recordBw = mtbw.getBatchWriter(recordTableName);
        schemaBw = mtbw.getBatchWriter(schemaTableName);
    } catch (TableNotFoundException e) {
        log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e);
        throw new RuntimeException(e);
    }

    // Some temp Texts to avoid lots of object allocations
    final Text cfHolder = new Text();
    final HashMap<String, Long> counts = new HashMap<String, Long>();

    // write records
    Mutation recordMutation = new Mutation(rowId);
    for (int i = 0; i < record.length; i++) {
        final String columnName = header[i];
        final String columnValue = record[i];

        if (counts.containsKey(columnName)) {
            counts.put(columnName, counts.get(columnName) + 1);
        } else {
            counts.put(columnName, 1l);
        }

        cfHolder.set(columnName);

        recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes()));
    }

    recordBw.addMutation(recordMutation);

    // update counts in schema
    for (Entry<String, Long> schemaUpdate : counts.entrySet()) {
        Mutation schemaMutation = new Mutation(schemaUpdate.getKey());

        schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue()));
        schemaBw.addMutation(schemaMutation);
    }
}

From source file:be.uantwerpen.adrem.disteclat.ItemReaderReducer.java

License:Apache License

/**
 * Writes the singletons distribution to file OSingletonsDistribution. The distribution is obtained using Round-Robin
 * allocation./*from  w ww.  j  av  a2 s  .co m*/
 * 
 * @param sortedSingletons
 *          the sorted list of singletons
 * @throws IOException
 * @throws InterruptedException
 */
private void writeSingletonsDistribution(List<Integer> sortedSingletons)
        throws IOException, InterruptedException {
    int end = Math.min(numberOfMappers, sortedSingletons.size());

    Text mapperId = new Text();
    Text assignedItems = new Text();

    // Round robin assignment
    for (int ix = 0; ix < end; ix++) {
        StringBuilder sb = new StringBuilder();
        for (int ix1 = ix; ix1 < sortedSingletons.size(); ix1 += numberOfMappers) {
            sb.append(sortedSingletons.get(ix1)).append(" ");
        }

        mapperId.set("" + ix);
        assignedItems.set(sb.substring(0, sb.length() - 1));
        mos.write(OSingletonsDistribution, mapperId, assignedItems);
    }
}

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

private static void read(Reader reader, Text key, BytesWritable val) {
    try {//from w w w. j a va  2 s.c o  m
        if (reader.next(key, val))
            return;
    } catch (Throwable t) {
        t.printStackTrace();
    }
    key.set("");
}

From source file:cascading.scheme.hadoop.TextDelimited.java

License:Open Source License

protected void writeHeader(SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
    Fields fields = sinkCall.getOutgoingEntry().getFields();

    Text text = (Text) sinkCall.getContext()[0];
    StringBuilder line = (StringBuilder) sinkCall.getContext()[1];
    Charset charset = (Charset) sinkCall.getContext()[2];

    line = (StringBuilder) delimitedParser.joinFirstLine(fields, line);

    text.set(line.toString().getBytes(charset));

    sinkCall.getOutput().collect(null, text);

    line.setLength(0);/* ww w .  j  a va  2s.co m*/
}

From source file:cascading.scheme.hadoop.TextDelimited.java

License:Open Source License

@Override
public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
        throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();

    Text text = (Text) sinkCall.getContext()[0];
    StringBuilder line = (StringBuilder) sinkCall.getContext()[1];
    Charset charset = (Charset) sinkCall.getContext()[2];

    Iterable<String> strings = tupleEntry.asIterableOf(String.class);

    line = (StringBuilder) delimitedParser.joinLine(strings, line);

    text.set(line.toString().getBytes(charset));

    sinkCall.getOutput().collect(null, text);

    line.setLength(0);// w w  w.ja  v a 2s  .c o m
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
        throws IOException {
    Text text = (Text) sinkCall.getContext()[0];
    Charset charset = (Charset) sinkCall.getContext()[1];
    String line = sinkCall.getOutgoingEntry().getTuple().toString();

    text.set(line.getBytes(charset));

    // it's ok to use NULL here so the collector does not write anything
    sinkCall.getOutput().collect(null, text);
}

From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java

License:Apache License

public Text evaluate(Text format) {
    if (format == null) {
        format.set("yyyy-MM-dd HH:mm:ss");
    }//from  w w  w . ja  va  2 s . c  o m

    Date date = new Date();
    formatter.applyPattern(format.toString());
    result.set(formatter.format(date));
    return result;
}

From source file:cn.com.diditaxi.hive.cf.UDFSysDate.java

License:Apache License

public Text evaluate(Text format, IntWritable days) {
    if (format == null) {
        format.set("yyyy-MM-dd HH:mm:ss");
    }/*from w  w  w.  j ava 2  s .  co m*/

    formatter.applyPattern(format.toString());
    Date date = new Date();
    calendar.setTime(date);
    calendar.add(Calendar.DAY_OF_MONTH, days.get());
    Date newDate = calendar.getTime();
    result.set(formatter.format(newDate));
    return result;
}

From source file:cn.com.warlock.SequenceFilesTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    String hdfsUri = "hdfs://hlg-2p238-fandongsheng:8020";
    String pathStr = "/tmp/example/seq1";
    String compressType = "1";

    // ??windows?
    // System.setProperty("hadoop.home.dir", "E:\\tools");

    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", hdfsUri);
    Path path = new Path(pathStr);

    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*  w  ww.  j a v  a  2  s  . c o m*/
        SequenceFile.Writer.Option pathOpt = SequenceFile.Writer.file(path);
        SequenceFile.Writer.Option keyClassOpt = SequenceFile.Writer.keyClass(key.getClass());
        SequenceFile.Writer.Option valueClassOpt = SequenceFile.Writer.valueClass(value.getClass());
        SequenceFile.Writer.Option compressionOpt = null;

        // compress type
        if (compressType.equals("1")) {
            System.out.println("compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        } else if (compressType.equals("2")) {
            System.out.println("compress record");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.RECORD);
        } else if (compressType.equals("3")) {
            System.out.println("compress block");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.BLOCK);
        } else {
            System.out.println("Default : compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        }

        writer = SequenceFile.createWriter(conf, pathOpt, keyClassOpt, valueClassOpt, compressionOpt);

        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);

        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:cn.lhfei.hadoop.ch04.MapFileWriteDemo.java

License:Apache License

public static void main(String[] args) {
    String uri = args[0];/*from ww  w .jav  a 2 s .  c  om*/
    Configuration conf = new Configuration();
    FileSystem fs = null;

    IntWritable key = new IntWritable();
    Text value = new Text();
    MapFile.Writer writer = null;
    try {
        fs = FileSystem.get(URI.create(uri), conf);
        /*writer = new MapFile.Writer(conf, fs, uri, key.getClass(),
              value.getClass());*/

        writer = new MapFile.Writer(conf, new Path(uri), Writer.keyClass(key.getClass()),
                Writer.valueClass(value.getClass()));

        for (int i = 0; i < 1024; i++) {
            key.set(i + 1);
            value.set(DATA[i % DATA.length]);
            writer.append(key, value);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}