Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadString() throws Exception {
    if (!canTest()) {
        return;//w  w  w  .j  a v  a  2  s  .c o m
    }

    final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, Text.class);
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadStringArrayFile() throws Exception {
    if (!canTest()) {
        return;/*from   w  w w .java  2  s. c om*/
    }

    final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs1, "target/test/test-camel-string1", Text.class,
            CompressionType.NONE, new Progressable() {
                @Override
                public void progress() {
                }
            });
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(valueWritable);
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs2:///" + file.getParent().toUri()
                    + "?fileSystemType=LOCAL&fileType=ARRAY_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.camel.itest.osgi.hdfs.HdfsRouteTest.java

License:Apache License

@Test
public void testReadString() throws Exception {
    if (SKIP) {//w w w .j av a2 s .  c o  m
        return;
    }

    final Path file = new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath());
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    //conf.setClassLoader(this.getClass().getClassLoader());
    // add the default configure into the resource
    conf.addResource(HdfsRouteTest.class.getResourceAsStream("/core-default.xml"));
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, Text.class);
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.crunch.io.text.csv.CSVLineReader.java

License:Apache License

/**
 * This method will read through one full CSV record, place its content into
 * the input Text and return the number of bytes (including newline
 * characters) that were consumed./*  w w  w  .  j ava2  s . c om*/
 * 
 * @param input
 *          a mutable @{link Text} object into which the text of the CSV
 *          record will be stored, without any line feeds or carriage returns
 * @return the number of byes that were read, including any control
 *         characters, line feeds, or carriage returns.
 * @throws IOException
 *           if an IOException occurs while handling the file to be read
 */
public int readCSVLine(final Text input) throws IOException {
    Preconditions.checkNotNull(input, "inputText may not be null");
    inputText = new Text(input);
    long totalBytesConsumed = 0;
    if (endOfFile) {
        return 0;
    }
    if (inMultiLine) {
        throw new RuntimeException(
                "Cannot begin reading a CSV record while inside of a multi-line CSV record.");
    }

    final StringBuilder stringBuilder = new StringBuilder();
    do {
        // Read a line from the file and add it to the builder
        inputText.clear();
        totalBytesConsumed += readFileLine(inputText);
        stringBuilder.append(inputText.toString());

        if (currentlyInQuotes && !endOfFile) {
            // If we end up in a multi-line record, we need append a newline
            stringBuilder.append('\n');

            // Do a check on the total bytes consumed to see if something has gone
            // wrong.
            if (totalBytesConsumed > maximumRecordSize || totalBytesConsumed > Integer.MAX_VALUE) {
                final String record = stringBuilder.toString();
                LOGGER.error("Possibly malformed file encountered. First line of record: "
                        + record.substring(0, record.indexOf('\n')));
                throw new IOException(
                        "Possibly malformed file encountered. Check log statements for more information");
            }
        }
    } while (currentlyInQuotes && !endOfFile);

    // Set the input to the multi-line record
    input.set(stringBuilder.toString());
    return (int) totalBytesConsumed;
}

From source file:org.apache.crunch.io.text.csv.CSVLineReader.java

License:Apache License

/**
 * A method for reading through one single line in the CSV file, that is, it
 * will read until the first line feed, carriage return, or set of both is
 * found. The CSV parsing logic markers are maintained outside of this method
 * to enable the manipulation that logic in order to find the beginning of a
 * CSV record. Use {@link CSVLineReader#isInMultiLine()} and
 * {@link CSVLineReader#resetMultiLine()} to do so. See
 * {@link CSVInputFormat#getSplitsForFile(long, long, org.apache.hadoop.fs.Path, org.apache.hadoop.fs.FSDataInputStream)}
 * for an example./*from w w w  .j av  a2s . c om*/
 * 
 * @param input
 *          a mutable @{link Text} object into which the text of the line will
 *          be stored, without any line feeds or carriage returns
 * @return the number of byes that were read, including any control
 *         characters, line feeds, or carriage returns.
 * @throws IOException
 *           if an IOException occurs while handling the file to be read
 */
public int readFileLine(final Text input) throws IOException {
    Preconditions.checkNotNull(input, "inputText may not be null");
    if (endOfFile) {
        return 0;
    }

    // This integer keeps track of the number of newline characters used to
    // terminate the line being read. This could be 1, in the case of LF or CR,
    // or 2, in the case of CRLF.
    int newlineLength = 0;
    int inputTextLength = 0;
    long bytesConsumed = 0;
    int readTextLength = 0;
    int startPosition = bufferPosition;
    endOfLineReached = false;
    inputText = new Text(input);

    do {
        boolean checkForLF = false;
        // Figure out where we are in the buffer and fill it if necessary.
        if (bufferPosition >= bufferLength) {
            refillBuffer();
            startPosition = bufferPosition;
            if (endOfFile) {
                break;
            }
        }

        newlineLength = 0;
        // Iterate through the buffer looking for newline characters while keeping
        // track of if we're in a field and/or in quotes.
        for (; bufferPosition < bufferLength; ++bufferPosition) {
            bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
            if (buffer[bufferPosition] == this.escape) {
                if (isNextCharacterEscapable(currentlyInQuotes, bufferPosition)) {
                    // checks to see if we are in quotes and if the next character is a
                    // quote or an escape
                    // character. If so, that's fine. Record the next character's size
                    // and skip it.
                    ++bufferPosition;
                    bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                }
            } else if (buffer[bufferPosition] == openQuoteChar || buffer[bufferPosition] == closeQuoteChar) {
                // toggle currentlyInQuotes if we've hit a non-escaped quote character
                currentlyInQuotes = !currentlyInQuotes;
            } else if (buffer[bufferPosition] == LF || buffer[bufferPosition] == CR) {
                boolean lastCharWasCR = buffer[bufferPosition] == CR;
                // Line is over, make note and increment the size of the newlinelength
                // counter.
                endOfLineReached = true;
                ++newlineLength;
                ++bufferPosition;
                if (lastCharWasCR && buffer[bufferPosition] == LF) {
                    lastCharWasCR = false;
                    // Check for LF (in case of CRLF line endings) and increment the
                    // counter, skip it by moving the buffer position, then record the
                    // length of the LF.
                    ++newlineLength;
                    ++bufferPosition;
                    bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                } else if (lastCharWasCR && bufferPosition >= bufferLength) {
                    // We just read a CR at the very end of the buffer. If this is a
                    // file with CRLF line endings, there will be a LF next that we need
                    // to check for and account for in bytesRead before we count this
                    // line as "read".
                    checkForLF = true;
                }
                break;
            }
        }
        // This is the length of the actual text and important stuff in the line.
        readTextLength = bufferPosition - startPosition - newlineLength;

        // Append the results.
        if (readTextLength > Integer.MAX_VALUE - inputTextLength) {
            readTextLength = Integer.MAX_VALUE - inputTextLength;
        }
        if (readTextLength > 0) {
            // This will append the portion of the buffer containing only the
            // important text, omitting any newline characters
            inputText.set(new StringBuilder().append(inputText.toString())
                    .append(new String(buffer, startPosition, readTextLength)).toString());
            inputTextLength += readTextLength;
        }

        // If the last character we read was a CR at the end of the buffer, we
        // need to check for an LF after a buffer refill.
        if (checkForLF) {
            refillBuffer();
            if (endOfFile) {
                break;
            }
            if (buffer[bufferPosition] == LF) {
                bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                ++bufferPosition;
                ++newlineLength;
            }
        }

    } while (newlineLength == 0 && bytesConsumed < Integer.MAX_VALUE);

    if (endOfLineReached) {
        if (currentlyInQuotes) {
            inMultiLine = true;
        } else {
            inMultiLine = false;
        }
    }

    if (bytesConsumed > Integer.MAX_VALUE) {
        throw new IOException("Too many bytes consumed before newline: " + Integer.MAX_VALUE);
    }

    input.set(inputText);
    return (int) bytesConsumed;
}

From source file:org.apache.flink.test.hadoop.mapred.HadoopIOFormatsITCase.java

License:Apache License

@Override
protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };

    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();

    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());

    //  ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;

    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*from   w ww  .j  a  v a  2 s. c  o  m*/
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    key.set(i);
                    value.set(i + " - somestring");
                    writer.append(key, value);
                }
            }
            key.set(i);
            value.set(i + " - somestring");
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }

    //  ------------------ Long / Text Key Value pair: ------------

    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);

    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            value1.set(i);
            writer1.append(NullWritable.get(), value1);
        }
    } finally {
        IOUtils.closeStream(writer1);
    }
}

From source file:org.apache.giraph.types.ops.TextTypeOps.java

License:Apache License

@Override
public void set(Text to, Text from) {
    to.set(from.getBytes());
}

From source file:org.apache.gobblin.runtime.JobState.java

License:Apache License

public void write(DataOutput out, boolean writeTasks, boolean writePreviousWorkUnitStates) throws IOException {
    Text text = new Text();
    text.set(this.jobName);
    text.write(out);//from   w ww.  jav  a 2 s. co m
    text.set(this.jobId);
    text.write(out);
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    text.set(this.state.name());
    text.write(out);
    out.writeInt(this.taskCount);
    if (writeTasks) {
        out.writeInt(this.taskStates.size() + this.skippedTaskStates.size());
        for (TaskState taskState : this.taskStates.values()) {
            taskState.write(out);
        }
        for (TaskState taskState : this.skippedTaskStates.values()) {
            taskState.write(out);
        }
    } else {
        out.writeInt(0);
    }
    super.write(out, writePreviousWorkUnitStates);
}

From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, Text value) throws IOException {

    while (pos < end) {

        String json = parser.nextObjectContainingMember(jsonMemberName);
        pos = start + parser.getBytesRead();

        if (json == null) {
            return false;
        }/*from   www.  ja  v  a 2 s . c o  m*/

        long jsonStart = pos - json.length();

        // if the "begin-object" position is after the end of our split, we should ignore it
        if (jsonStart >= end) {
            return false;
        }

        if (json.length() > maxObjectLength) {
            LOG.warn("Skipped JSON object of size " + json.length() + " at pos " + jsonStart);
        } else {
            key.set(jsonStart);
            value.set(json);
            return true;
        }
    }

    return false;
}

From source file:org.apache.hive.storage.jdbc.JdbcSerDe.java

License:Apache License

@Override
public Object deserialize(Writable blob) throws SerDeException {
    LOGGER.debug("Deserializing from SerDe");
    if (!(blob instanceof MapWritable)) {
        throw new SerDeException("Expected MapWritable. Got " + blob.getClass().getName());
    }/* w w w .  j  a va  2  s .  com*/

    if ((row == null) || (columnNames == null)) {
        throw new SerDeException("JDBC SerDe hasn't been initialized properly");
    }

    row.clear();
    MapWritable input = (MapWritable) blob;
    Text columnKey = new Text();

    for (int i = 0; i < numColumns; i++) {
        columnKey.set(columnNames.get(i));
        Writable value = input.get(columnKey);
        row.add(value instanceof NullWritable ? null : ((ObjectWritable) value).get());
    }

    return row;
}