Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java

License:Apache License

protected void createSequenceInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles,
        int numberOfRows) throws IOException, InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();

    CompressionCodec codec = null;//from w w w . j a v a  2  s . co  m
    if (clz != null) {
        codec = clz.newInstance();
        if (codec instanceof Configurable) {
            ((Configurable) codec).setConf(conf);
        }
    }

    int index = 1;
    for (int fi = 0; fi < numberOfFiles; fi++) {
        Path filepath = new Path(indir, UUID.randomUUID() + ".seq");
        SequenceFile.Writer filewriter;
        if (codec != null) {
            filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class,
                    NullWritable.class, SequenceFile.CompressionType.BLOCK, codec);
        } else {
            filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class,
                    NullWritable.class, SequenceFile.CompressionType.NONE);
        }

        Text text = new Text();
        for (int ri = 0; ri < numberOfRows; ri++) {
            String row = index + "," + (double) index + ",'" + index + "'";
            text.set(row);
            filewriter.append(text, NullWritable.get());
            index++;
        }

        filewriter.close();
    }
}

From source file:org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableImportMapper.java

License:Apache License

public void map(Integer dataSliceId, NullWritable val, Context context)
        throws IOException, InterruptedException {
    conf = context.getConfiguration();// w  w  w. j a  va 2 s . c om
    dbc = new DBConfiguration(conf);
    numMappers = ConfigurationHelper.getConfNumMaps(conf);
    char rd = (char) conf.getInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, '\n');
    initNetezzaExternalTableImport(dataSliceId);
    counter = new PerfCounters();
    counter.startClock();
    Text outputRecord = new Text();
    if (extTableThread.isAlive()) {
        try {
            String inputRecord = recordReader.readLine();
            while (inputRecord != null) {
                if (Thread.interrupted()) {
                    if (!extTableThread.isAlive()) {
                        break;
                    }
                }
                outputRecord.set(inputRecord + rd);
                // May be we should set the output to be String for faster performance
                // There is no real benefit in changing it to Text and then
                // converting it back in our case
                writeRecord(outputRecord, context);
                counter.addBytes(1 + inputRecord.length());
                inputRecord = recordReader.readLine();
            }
        } finally {
            recordReader.close();
            extTableThread.join();
            counter.stopClock();
            LOG.info("Transferred " + counter.toString());
            if (extTableThread.hasExceptions()) {
                extTableThread.printException();
                throw new IOException(extTableThread.getException());
            }
        }
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java

License:Apache License

public SqoopRecord convertToSqoopRecord(HCatRecord hcr) throws IOException {
    Text key = new Text();
    for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) {
        String colName = e.getKey();
        String hfn = colName.toLowerCase();
        key.set(hfn);
        String javaColType = colTypesJava.get(key).toString();
        int sqlType = ((IntWritable) colTypesSql.get(key)).get();
        HCatFieldSchema field = hCatFullTableSchema.get(hfn);
        HCatFieldSchema.Type fieldType = field.getType();
        Object hCatVal = hcr.get(hfn, hCatFullTableSchema);
        String hCatTypeString = field.getTypeString();
        Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString);
        if (debugHCatExportMapper) {
            LOG.debug("hCatVal " + hCatVal + " of type "
                    + (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal
                    + " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type "
                    + javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType));
        }/*from w  w  w  .  j  a v a  2s. co m*/
        sqoopRecord.setField(colName, sqlVal);
    }
    return sqoopRecord;
}

From source file:org.apache.tez.mapreduce.examples.processor.FilterByWordInputProcessor.java

License:Apache License

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {

    if (inputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single input");
    }/*from   w w w  .j  a  va  2  s . c  om*/

    if (outputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single output");
    }

    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }

    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }

    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof UnorderedKVOutput)) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }

    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;

    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }

    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();

    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();

        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}

From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java

License:Apache License

public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job,
        String filename, long startKey, long numKeys) throws IOException {
    LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>();
    Path file = new Path(workDir, filename);
    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {// w  w  w .  java  2 s.c o  m
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (long i = startKey; i < numKeys; i++) {
            key.set(i);
            value.set(Integer.toString(r.nextInt(10000)));
            data.put(new LongWritable(key.get()), new Text(value.toString()));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }
    return data;
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file)
        throws IOException {
    FileInputFormat.setInputPaths(job, workDir);

    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {/*from  w  w  w . j  a  v a2  s.co  m*/
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 10; i > 0; i--) {
            key.set(r.nextInt(1000));
            value.set(Integer.toString(i));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }

    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(job, 1);
    System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; "
            + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; "
            + "file = " + ((FileSplit) splits[0]).getPath());
    return splits[0];
}

From source file:org.apache.tez.processor.FilterByWordInputProcessor.java

License:Apache License

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {

    if (inputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single input");
    }//  www .  jav a 2 s.  c o m

    if (outputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single output");
    }

    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }

    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof OnFileUnorderedKVOutput)) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }

    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo;

    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }

    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();

    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();

        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys,
        int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1,
            HashPartitioner.class);
    CompressionCodec codec = null;//from   w  ww .j a va  2 s .c om
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.archive.giraph.InDegreeCountComputationVertexWithTextValue.java

License:Apache License

@Override
public void compute(Vertex<LongWritable, Text, Text> vertex, Iterable<Text> messages) {
    if (getSuperstep() == 0) {
        Iterable<Edge<LongWritable, Text>> edges = vertex.getEdges();
        for (Edge<LongWritable, Text> edge : edges) {
            sendMessage(edge.getTargetVertexId(), new Text("1"));
        }/*  w ww .  ja v a2  s .  c  om*/
    } else {
        long sum = 0;
        for (Text message : messages) {
            sum++;
        }
        Text vertexValue = vertex.getValue();
        vertexValue.set(Double.toString(sum));
        vertex.setValue(vertexValue);
        vertex.voteToHalt();
    }
}

From source file:org.archive.giraph.InDegreeCountVertexWithTextValue.java

License:Apache License

@Override
public void compute(Iterable<Text> messages) {
    if (getSuperstep() == 0) {
        Iterable<Edge<LongWritable, Text>> edges = getEdges();
        for (Edge<LongWritable, Text> edge : edges) {
            sendMessage(edge.getTargetVertexId(), new Text("1"));
        }// w  w  w.j ava2  s  .  c o m
    } else {
        long sum = 0;
        for (Text message : messages) {
            sum++;
        }
        Text vertexValue = getValue();
        vertexValue.set(Double.toString(sum));
        setValue(vertexValue);
        voteToHalt();
    }
}