Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadString() throws Exception {
    if (!canTest()) {
        return;//w  w  w  .j  a v  a  2  s  .c o m
    }

    final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, Text.class);
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadStringArrayFile() throws Exception {
    if (!canTest()) {
        return;/*from   w  w w .java  2  s. c om*/
    }

    final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs1, "target/test/test-camel-string1", Text.class,
            CompressionType.NONE, new Progressable() {
                @Override
                public void progress() {
                }
            });
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(valueWritable);
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs2:///" + file.getParent().toUri()
                    + "?fileSystemType=LOCAL&fileType=ARRAY_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.camel.itest.osgi.hdfs.HdfsRouteTest.java

License:Apache License

@Test
public void testReadString() throws Exception {
    if (SKIP) {//w w w .j av a2 s .  c o  m
        return;
    }

    final Path file = new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath());
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    //conf.setClassLoader(this.getClass().getClassLoader());
    // add the default configure into the resource
    conf.addResource(HdfsRouteTest.class.getResourceAsStream("/core-default.xml"));
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, Text.class);
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.crunch.io.text.csv.CSVLineReader.java

License:Apache License

/**
 * This method will read through one full CSV record, place its content into
 * the input Text and return the number of bytes (including newline
 * characters) that were consumed./*  w w  w  .  j ava2  s . c om*/
 * 
 * @param input
 *          a mutable @{link Text} object into which the text of the CSV
 *          record will be stored, without any line feeds or carriage returns
 * @return the number of byes that were read, including any control
 *         characters, line feeds, or carriage returns.
 * @throws IOException
 *           if an IOException occurs while handling the file to be read
 */
public int readCSVLine(final Text input) throws IOException {
    Preconditions.checkNotNull(input, "inputText may not be null");
    inputText = new Text(input);
    long totalBytesConsumed = 0;
    if (endOfFile) {
        return 0;
    }
    if (inMultiLine) {
        throw new RuntimeException(
                "Cannot begin reading a CSV record while inside of a multi-line CSV record.");
    }

    final StringBuilder stringBuilder = new StringBuilder();
    do {
        // Read a line from the file and add it to the builder
        inputText.clear();
        totalBytesConsumed += readFileLine(inputText);
        stringBuilder.append(inputText.toString());

        if (currentlyInQuotes && !endOfFile) {
            // If we end up in a multi-line record, we need append a newline
            stringBuilder.append('\n');

            // Do a check on the total bytes consumed to see if something has gone
            // wrong.
            if (totalBytesConsumed > maximumRecordSize || totalBytesConsumed > Integer.MAX_VALUE) {
                final String record = stringBuilder.toString();
                LOGGER.error("Possibly malformed file encountered. First line of record: "
                        + record.substring(0, record.indexOf('\n')));
                throw new IOException(
                        "Possibly malformed file encountered. Check log statements for more information");
            }
        }
    } while (currentlyInQuotes && !endOfFile);

    // Set the input to the multi-line record
    input.set(stringBuilder.toString());
    return (int) totalBytesConsumed;
}

From source file:org.apache.crunch.io.text.csv.CSVLineReader.java

License:Apache License

/**
 * A method for reading through one single line in the CSV file, that is, it
 * will read until the first line feed, carriage return, or set of both is
 * found. The CSV parsing logic markers are maintained outside of this method
 * to enable the manipulation that logic in order to find the beginning of a
 * CSV record. Use {@link CSVLineReader#isInMultiLine()} and
 * {@link CSVLineReader#resetMultiLine()} to do so. See
 * {@link CSVInputFormat#getSplitsForFile(long, long, org.apache.hadoop.fs.Path, org.apache.hadoop.fs.FSDataInputStream)}
 * for an example./*from w w w  .j av  a2s . c om*/
 * 
 * @param input
 *          a mutable @{link Text} object into which the text of the line will
 *          be stored, without any line feeds or carriage returns
 * @return the number of byes that were read, including any control
 *         characters, line feeds, or carriage returns.
 * @throws IOException
 *           if an IOException occurs while handling the file to be read
 */
public int readFileLine(final Text input) throws IOException {
    Preconditions.checkNotNull(input, "inputText may not be null");
    if (endOfFile) {
        return 0;
    }

    // This integer keeps track of the number of newline characters used to
    // terminate the line being read. This could be 1, in the case of LF or CR,
    // or 2, in the case of CRLF.
    int newlineLength = 0;
    int inputTextLength = 0;
    long bytesConsumed = 0;
    int readTextLength = 0;
    int startPosition = bufferPosition;
    endOfLineReached = false;
    inputText = new Text(input);

    do {
        boolean checkForLF = false;
        // Figure out where we are in the buffer and fill it if necessary.
        if (bufferPosition >= bufferLength) {
            refillBuffer();
            startPosition = bufferPosition;
            if (endOfFile) {
                break;
            }
        }

        newlineLength = 0;
        // Iterate through the buffer looking for newline characters while keeping
        // track of if we're in a field and/or in quotes.
        for (; bufferPosition < bufferLength; ++bufferPosition) {
            bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
            if (buffer[bufferPosition] == this.escape) {
                if (isNextCharacterEscapable(currentlyInQuotes, bufferPosition)) {
                    // checks to see if we are in quotes and if the next character is a
                    // quote or an escape
                    // character. If so, that's fine. Record the next character's size
                    // and skip it.
                    ++bufferPosition;
                    bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                }
            } else if (buffer[bufferPosition] == openQuoteChar || buffer[bufferPosition] == closeQuoteChar) {
                // toggle currentlyInQuotes if we've hit a non-escaped quote character
                currentlyInQuotes = !currentlyInQuotes;
            } else if (buffer[bufferPosition] == LF || buffer[bufferPosition] == CR) {
                boolean lastCharWasCR = buffer[bufferPosition] == CR;
                // Line is over, make note and increment the size of the newlinelength
                // counter.
                endOfLineReached = true;
                ++newlineLength;
                ++bufferPosition;
                if (lastCharWasCR && buffer[bufferPosition] == LF) {
                    lastCharWasCR = false;
                    // Check for LF (in case of CRLF line endings) and increment the
                    // counter, skip it by moving the buffer position, then record the
                    // length of the LF.
                    ++newlineLength;
                    ++bufferPosition;
                    bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                } else if (lastCharWasCR && bufferPosition >= bufferLength) {
                    // We just read a CR at the very end of the buffer. If this is a
                    // file with CRLF line endings, there will be a LF next that we need
                    // to check for and account for in bytesRead before we count this
                    // line as "read".
                    checkForLF = true;
                }
                break;
            }
        }
        // This is the length of the actual text and important stuff in the line.
        readTextLength = bufferPosition - startPosition - newlineLength;

        // Append the results.
        if (readTextLength > Integer.MAX_VALUE - inputTextLength) {
            readTextLength = Integer.MAX_VALUE - inputTextLength;
        }
        if (readTextLength > 0) {
            // This will append the portion of the buffer containing only the
            // important text, omitting any newline characters
            inputText.set(new StringBuilder().append(inputText.toString())
                    .append(new String(buffer, startPosition, readTextLength)).toString());
            inputTextLength += readTextLength;
        }

        // If the last character we read was a CR at the end of the buffer, we
        // need to check for an LF after a buffer refill.
        if (checkForLF) {
            refillBuffer();
            if (endOfFile) {
                break;
            }
            if (buffer[bufferPosition] == LF) {
                bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]);
                ++bufferPosition;
                ++newlineLength;
            }
        }

    } while (newlineLength == 0 && bytesConsumed < Integer.MAX_VALUE);

    if (endOfLineReached) {
        if (currentlyInQuotes) {
            inMultiLine = true;
        } else {
            inMultiLine = false;
        }
    }

    if (bytesConsumed > Integer.MAX_VALUE) {
        throw new IOException("Too many bytes consumed before newline: " + Integer.MAX_VALUE);
    }

    input.set(inputText);
    return (int) bytesConsumed;
}

From source file:org.apache.flink.test.hadoop.mapred.HadoopIOFormatsITCase.java

License:Apache License

@Override
protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };

    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();

    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());

    //  ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;

    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*from   w ww  .j  a  v a  2 s. c  o  m*/
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    key.set(i);
                    value.set(i + " - somestring");
                    writer.append(key, value);
                }
            }
            key.set(i);
            value.set(i + " - somestring");
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }

    //  ------------------ Long / Text Key Value pair: ------------

    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);

    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            value1.set(i);
            writer1.append(NullWritable.get(), value1);
        }
    } finally {
        IOUtils.closeStream(writer1);
    }
}

From source file:org.apache.giraph.types.ops.TextTypeOps.java

License:Apache License

@Override
public void set(Text to, Text from) {
    to.set(from.getBytes());
}

From source file:org.apache.gobblin.runtime.JobState.java

License:Apache License

public void write(DataOutput out, boolean writeTasks, boolean writePreviousWorkUnitStates) throws IOException {
    Text text = new Text();
    text.set(this.jobName);
    text.write(out);//from   w ww.  jav  a 2 s. co m
    text.set(this.jobId);
    text.write(out);
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    text.set(this.state.name());
    text.write(out);
    out.writeInt(this.taskCount);
    if (writeTasks) {
        out.writeInt(this.taskStates.size() + this.skippedTaskStates.size());
        for (TaskState taskState : this.taskStates.values()) {
            taskState.write(out);
        }
        for (TaskState taskState : this.skippedTaskStates.values()) {
            taskState.write(out);
        }
    } else {
        out.writeInt(0);
    }
    super.write(out, writePreviousWorkUnitStates);
}

From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, Text value) throws IOException {

    while (pos < end) {

        String json = parser.nextObjectContainingMember(jsonMemberName);
        pos = start + parser.getBytesRead();

        if (json == null) {
            return false;
        }/*from   www.  ja  v  a 2 s . c o  m*/

        long jsonStart = pos - json.length();

        // if the "begin-object" position is after the end of our split, we should ignore it
        if (jsonStart >= end) {
            return false;
        }

        if (json.length() > maxObjectLength) {
            LOG.warn("Skipped JSON object of size " + json.length() + " at pos " + jsonStart);
        } else {
            key.set(jsonStart);
            value.set(json);
            return true;
        }
    }

    return false;
}

From source file:org.apache.hive.storage.jdbc.JdbcSerDe.java

License:Apache License

@Override
public Object deserialize(Writable blob) throws SerDeException {
    LOGGER.debug("Deserializing from SerDe");
    if (!(blob instanceof MapWritable)) {
        throw new SerDeException("Expected MapWritable. Got " + blob.getClass().getName());
    }/* w w w .  j  a va  2  s .  com*/

    if ((row == null) || (columnNames == null)) {
        throw new SerDeException("JDBC SerDe hasn't been initialized properly");
    }

    row.clear();
    MapWritable input = (MapWritable) blob;
    Text columnKey = new Text();

    for (int i = 0; i < numColumns; i++) {
        columnKey.set(columnNames.get(i));
        Writable value = input.get(columnKey);
        row.add(value instanceof NullWritable ? null : ((ObjectWritable) value).get());
    }

    return row;
}