Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java

License:Apache License

protected void createSequenceInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles,
        int numberOfRows) throws IOException, InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();

    CompressionCodec codec = null;//from w w w . j a v a  2  s . co  m
    if (clz != null) {
        codec = clz.newInstance();
        if (codec instanceof Configurable) {
            ((Configurable) codec).setConf(conf);
        }
    }

    int index = 1;
    for (int fi = 0; fi < numberOfFiles; fi++) {
        Path filepath = new Path(indir, UUID.randomUUID() + ".seq");
        SequenceFile.Writer filewriter;
        if (codec != null) {
            filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class,
                    NullWritable.class, SequenceFile.CompressionType.BLOCK, codec);
        } else {
            filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class,
                    NullWritable.class, SequenceFile.CompressionType.NONE);
        }

        Text text = new Text();
        for (int ri = 0; ri < numberOfRows; ri++) {
            String row = index + "," + (double) index + ",'" + index + "'";
            text.set(row);
            filewriter.append(text, NullWritable.get());
            index++;
        }

        filewriter.close();
    }
}

From source file:org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableImportMapper.java

License:Apache License

public void map(Integer dataSliceId, NullWritable val, Context context)
        throws IOException, InterruptedException {
    conf = context.getConfiguration();// w  w  w. j a  va 2 s . c om
    dbc = new DBConfiguration(conf);
    numMappers = ConfigurationHelper.getConfNumMaps(conf);
    char rd = (char) conf.getInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, '\n');
    initNetezzaExternalTableImport(dataSliceId);
    counter = new PerfCounters();
    counter.startClock();
    Text outputRecord = new Text();
    if (extTableThread.isAlive()) {
        try {
            String inputRecord = recordReader.readLine();
            while (inputRecord != null) {
                if (Thread.interrupted()) {
                    if (!extTableThread.isAlive()) {
                        break;
                    }
                }
                outputRecord.set(inputRecord + rd);
                // May be we should set the output to be String for faster performance
                // There is no real benefit in changing it to Text and then
                // converting it back in our case
                writeRecord(outputRecord, context);
                counter.addBytes(1 + inputRecord.length());
                inputRecord = recordReader.readLine();
            }
        } finally {
            recordReader.close();
            extTableThread.join();
            counter.stopClock();
            LOG.info("Transferred " + counter.toString());
            if (extTableThread.hasExceptions()) {
                extTableThread.printException();
                throw new IOException(extTableThread.getException());
            }
        }
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java

License:Apache License

public SqoopRecord convertToSqoopRecord(HCatRecord hcr) throws IOException {
    Text key = new Text();
    for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) {
        String colName = e.getKey();
        String hfn = colName.toLowerCase();
        key.set(hfn);
        String javaColType = colTypesJava.get(key).toString();
        int sqlType = ((IntWritable) colTypesSql.get(key)).get();
        HCatFieldSchema field = hCatFullTableSchema.get(hfn);
        HCatFieldSchema.Type fieldType = field.getType();
        Object hCatVal = hcr.get(hfn, hCatFullTableSchema);
        String hCatTypeString = field.getTypeString();
        Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString);
        if (debugHCatExportMapper) {
            LOG.debug("hCatVal " + hCatVal + " of type "
                    + (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal
                    + " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type "
                    + javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType));
        }/*from w  w  w  .  j  a v a  2s. co m*/
        sqoopRecord.setField(colName, sqlVal);
    }
    return sqoopRecord;
}

From source file:org.apache.tez.mapreduce.examples.processor.FilterByWordInputProcessor.java

License:Apache License

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {

    if (inputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single input");
    }/*from   w w w  .j  a  va  2  s . c  om*/

    if (outputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single output");
    }

    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }

    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }

    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof UnorderedKVOutput)) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }

    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;

    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }

    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();

    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();

        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}

From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java

License:Apache License

public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job,
        String filename, long startKey, long numKeys) throws IOException {
    LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>();
    Path file = new Path(workDir, filename);
    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {// w  w  w .  java  2 s.c o  m
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (long i = startKey; i < numKeys; i++) {
            key.set(i);
            value.set(Integer.toString(r.nextInt(10000)));
            data.put(new LongWritable(key.get()), new Text(value.toString()));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }
    return data;
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file)
        throws IOException {
    FileInputFormat.setInputPaths(job, workDir);

    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {/*from  w  w  w . j  a  v a2  s.co  m*/
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 10; i > 0; i--) {
            key.set(r.nextInt(1000));
            value.set(Integer.toString(i));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }

    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(job, 1);
    System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; "
            + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; "
            + "file = " + ((FileSplit) splits[0]).getPath());
    return splits[0];
}

From source file:org.apache.tez.processor.FilterByWordInputProcessor.java

License:Apache License

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {

    if (inputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single input");
    }//  www .  jav a 2 s.  c o m

    if (outputs.size() != 1) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with a single output");
    }

    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }

    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof OnFileUnorderedKVOutput)) {
        throw new IllegalStateException(
                "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }

    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo;

    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }

    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();

    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();

        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys,
        int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1,
            HashPartitioner.class);
    CompressionCodec codec = null;//from   w  ww .j a va  2 s .c om
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.archive.giraph.InDegreeCountComputationVertexWithTextValue.java

License:Apache License

@Override
public void compute(Vertex<LongWritable, Text, Text> vertex, Iterable<Text> messages) {
    if (getSuperstep() == 0) {
        Iterable<Edge<LongWritable, Text>> edges = vertex.getEdges();
        for (Edge<LongWritable, Text> edge : edges) {
            sendMessage(edge.getTargetVertexId(), new Text("1"));
        }/*  w ww .  ja v a2  s .  c  om*/
    } else {
        long sum = 0;
        for (Text message : messages) {
            sum++;
        }
        Text vertexValue = vertex.getValue();
        vertexValue.set(Double.toString(sum));
        vertex.setValue(vertexValue);
        vertex.voteToHalt();
    }
}

From source file:org.archive.giraph.InDegreeCountVertexWithTextValue.java

License:Apache License

@Override
public void compute(Iterable<Text> messages) {
    if (getSuperstep() == 0) {
        Iterable<Edge<LongWritable, Text>> edges = getEdges();
        for (Edge<LongWritable, Text> edge : edges) {
            sendMessage(edge.getTargetVertexId(), new Text("1"));
        }// w  w  w.j ava2  s  .  c o m
    } else {
        long sum = 0;
        for (Text message : messages) {
            sum++;
        }
        Text vertexValue = getValue();
        vertexValue.set(Double.toString(sum));
        setValue(vertexValue);
        voteToHalt();
    }
}