List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java
License:Apache License
@Test public void testReadString() throws Exception { if (!canTest()) { return;//w w w .j a v a 2 s .c o m } final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath()); Configuration conf = new Configuration(); SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, Text.class); NullWritable keyWritable = NullWritable.get(); Text valueWritable = new Text(); String value = "CIAO!"; valueWritable.set(value); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0") .to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }
From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java
License:Apache License
@Test public void testReadStringArrayFile() throws Exception { if (!canTest()) { return;/*from w w w .java 2 s. c om*/ } final Path file = new Path(new File("target/test/test-camel-string").getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fs1 = FileSystem.get(file.toUri(), conf); ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs1, "target/test/test-camel-string1", Text.class, CompressionType.NONE, new Progressable() { @Override public void progress() { } }); Text valueWritable = new Text(); String value = "CIAO!"; valueWritable.set(value); writer.append(valueWritable); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:///" + file.getParent().toUri() + "?fileSystemType=LOCAL&fileType=ARRAY_FILE&initialDelay=0").to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }
From source file:org.apache.camel.itest.osgi.hdfs.HdfsRouteTest.java
License:Apache License
@Test public void testReadString() throws Exception { if (SKIP) {//w w w .j av a2 s . c o m return; } final Path file = new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath()); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); //conf.setClassLoader(this.getClass().getClassLoader()); // add the default configure into the resource conf.addResource(HdfsRouteTest.class.getResourceAsStream("/core-default.xml")); FileSystem fs1 = FileSystem.get(file.toUri(), conf); SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, Text.class); NullWritable keyWritable = NullWritable.get(); Text valueWritable = new Text(); String value = "CIAO!"; valueWritable.set(value); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0") .to("mock:result"); } }); context.start(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); resultEndpoint.assertIsSatisfied(); }
From source file:org.apache.crunch.io.text.csv.CSVLineReader.java
License:Apache License
/** * This method will read through one full CSV record, place its content into * the input Text and return the number of bytes (including newline * characters) that were consumed./* w w w . j ava2 s . c om*/ * * @param input * a mutable @{link Text} object into which the text of the CSV * record will be stored, without any line feeds or carriage returns * @return the number of byes that were read, including any control * characters, line feeds, or carriage returns. * @throws IOException * if an IOException occurs while handling the file to be read */ public int readCSVLine(final Text input) throws IOException { Preconditions.checkNotNull(input, "inputText may not be null"); inputText = new Text(input); long totalBytesConsumed = 0; if (endOfFile) { return 0; } if (inMultiLine) { throw new RuntimeException( "Cannot begin reading a CSV record while inside of a multi-line CSV record."); } final StringBuilder stringBuilder = new StringBuilder(); do { // Read a line from the file and add it to the builder inputText.clear(); totalBytesConsumed += readFileLine(inputText); stringBuilder.append(inputText.toString()); if (currentlyInQuotes && !endOfFile) { // If we end up in a multi-line record, we need append a newline stringBuilder.append('\n'); // Do a check on the total bytes consumed to see if something has gone // wrong. if (totalBytesConsumed > maximumRecordSize || totalBytesConsumed > Integer.MAX_VALUE) { final String record = stringBuilder.toString(); LOGGER.error("Possibly malformed file encountered. First line of record: " + record.substring(0, record.indexOf('\n'))); throw new IOException( "Possibly malformed file encountered. Check log statements for more information"); } } } while (currentlyInQuotes && !endOfFile); // Set the input to the multi-line record input.set(stringBuilder.toString()); return (int) totalBytesConsumed; }
From source file:org.apache.crunch.io.text.csv.CSVLineReader.java
License:Apache License
/** * A method for reading through one single line in the CSV file, that is, it * will read until the first line feed, carriage return, or set of both is * found. The CSV parsing logic markers are maintained outside of this method * to enable the manipulation that logic in order to find the beginning of a * CSV record. Use {@link CSVLineReader#isInMultiLine()} and * {@link CSVLineReader#resetMultiLine()} to do so. See * {@link CSVInputFormat#getSplitsForFile(long, long, org.apache.hadoop.fs.Path, org.apache.hadoop.fs.FSDataInputStream)} * for an example./*from w w w .j av a2s . c om*/ * * @param input * a mutable @{link Text} object into which the text of the line will * be stored, without any line feeds or carriage returns * @return the number of byes that were read, including any control * characters, line feeds, or carriage returns. * @throws IOException * if an IOException occurs while handling the file to be read */ public int readFileLine(final Text input) throws IOException { Preconditions.checkNotNull(input, "inputText may not be null"); if (endOfFile) { return 0; } // This integer keeps track of the number of newline characters used to // terminate the line being read. This could be 1, in the case of LF or CR, // or 2, in the case of CRLF. int newlineLength = 0; int inputTextLength = 0; long bytesConsumed = 0; int readTextLength = 0; int startPosition = bufferPosition; endOfLineReached = false; inputText = new Text(input); do { boolean checkForLF = false; // Figure out where we are in the buffer and fill it if necessary. if (bufferPosition >= bufferLength) { refillBuffer(); startPosition = bufferPosition; if (endOfFile) { break; } } newlineLength = 0; // Iterate through the buffer looking for newline characters while keeping // track of if we're in a field and/or in quotes. for (; bufferPosition < bufferLength; ++bufferPosition) { bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]); if (buffer[bufferPosition] == this.escape) { if (isNextCharacterEscapable(currentlyInQuotes, bufferPosition)) { // checks to see if we are in quotes and if the next character is a // quote or an escape // character. If so, that's fine. Record the next character's size // and skip it. ++bufferPosition; bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]); } } else if (buffer[bufferPosition] == openQuoteChar || buffer[bufferPosition] == closeQuoteChar) { // toggle currentlyInQuotes if we've hit a non-escaped quote character currentlyInQuotes = !currentlyInQuotes; } else if (buffer[bufferPosition] == LF || buffer[bufferPosition] == CR) { boolean lastCharWasCR = buffer[bufferPosition] == CR; // Line is over, make note and increment the size of the newlinelength // counter. endOfLineReached = true; ++newlineLength; ++bufferPosition; if (lastCharWasCR && buffer[bufferPosition] == LF) { lastCharWasCR = false; // Check for LF (in case of CRLF line endings) and increment the // counter, skip it by moving the buffer position, then record the // length of the LF. ++newlineLength; ++bufferPosition; bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]); } else if (lastCharWasCR && bufferPosition >= bufferLength) { // We just read a CR at the very end of the buffer. If this is a // file with CRLF line endings, there will be a LF next that we need // to check for and account for in bytesRead before we count this // line as "read". checkForLF = true; } break; } } // This is the length of the actual text and important stuff in the line. readTextLength = bufferPosition - startPosition - newlineLength; // Append the results. if (readTextLength > Integer.MAX_VALUE - inputTextLength) { readTextLength = Integer.MAX_VALUE - inputTextLength; } if (readTextLength > 0) { // This will append the portion of the buffer containing only the // important text, omitting any newline characters inputText.set(new StringBuilder().append(inputText.toString()) .append(new String(buffer, startPosition, readTextLength)).toString()); inputTextLength += readTextLength; } // If the last character we read was a CR at the end of the buffer, we // need to check for an LF after a buffer refill. if (checkForLF) { refillBuffer(); if (endOfFile) { break; } if (buffer[bufferPosition] == LF) { bytesConsumed += calculateCharacterByteLength(buffer[bufferPosition]); ++bufferPosition; ++newlineLength; } } } while (newlineLength == 0 && bytesConsumed < Integer.MAX_VALUE); if (endOfLineReached) { if (currentlyInQuotes) { inMultiLine = true; } else { inMultiLine = false; } } if (bytesConsumed > Integer.MAX_VALUE) { throw new IOException("Too many bytes consumed before newline: " + Integer.MAX_VALUE); } input.set(inputText); return (int) bytesConsumed; }
From source file:org.apache.flink.test.hadoop.mapred.HadoopIOFormatsITCase.java
License:Apache License
@Override protected void preSubmit() throws Exception { resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") }; File sequenceFile = createAndRegisterTempFile("seqFile"); sequenceFileInPath = sequenceFile.toURI().toString(); // Create a sequence file org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf); Path path = new Path(sequenceFile.getAbsolutePath()); // ------------------ Long / Text Key Value pair: ------------ int kvCount = 4; LongWritable key = new LongWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {/*from w ww .j a v a 2 s. c o m*/ writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < kvCount; i++) { if (i == 1) { // write key = 0 a bit more often. for (int a = 0; a < 15; a++) { key.set(i); value.set(i + " - somestring"); writer.append(key, value); } } key.set(i); value.set(i + " - somestring"); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } // ------------------ Long / Text Key Value pair: ------------ File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey"); sequenceFileInPathNull = sequenceFileNull.toURI().toString(); path = new Path(sequenceFileInPathNull); LongWritable value1 = new LongWritable(); SequenceFile.Writer writer1 = null; try { writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass()); for (int i = 0; i < kvCount; i++) { value1.set(i); writer1.append(NullWritable.get(), value1); } } finally { IOUtils.closeStream(writer1); } }
From source file:org.apache.giraph.types.ops.TextTypeOps.java
License:Apache License
@Override public void set(Text to, Text from) { to.set(from.getBytes()); }
From source file:org.apache.gobblin.runtime.JobState.java
License:Apache License
public void write(DataOutput out, boolean writeTasks, boolean writePreviousWorkUnitStates) throws IOException { Text text = new Text(); text.set(this.jobName); text.write(out);//from w ww. jav a 2 s. co m text.set(this.jobId); text.write(out); out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); text.set(this.state.name()); text.write(out); out.writeInt(this.taskCount); if (writeTasks) { out.writeInt(this.taskStates.size() + this.skippedTaskStates.size()); for (TaskState taskState : this.taskStates.values()) { taskState.write(out); } for (TaskState taskState : this.skippedTaskStates.values()) { taskState.write(out); } } else { out.writeInt(0); } super.write(out, writePreviousWorkUnitStates); }
From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java
License:Apache License
@Override public boolean next(LongWritable key, Text value) throws IOException { while (pos < end) { String json = parser.nextObjectContainingMember(jsonMemberName); pos = start + parser.getBytesRead(); if (json == null) { return false; }/*from www. ja v a 2 s . c o m*/ long jsonStart = pos - json.length(); // if the "begin-object" position is after the end of our split, we should ignore it if (jsonStart >= end) { return false; } if (json.length() > maxObjectLength) { LOG.warn("Skipped JSON object of size " + json.length() + " at pos " + jsonStart); } else { key.set(jsonStart); value.set(json); return true; } } return false; }
From source file:org.apache.hive.storage.jdbc.JdbcSerDe.java
License:Apache License
@Override public Object deserialize(Writable blob) throws SerDeException { LOGGER.debug("Deserializing from SerDe"); if (!(blob instanceof MapWritable)) { throw new SerDeException("Expected MapWritable. Got " + blob.getClass().getName()); }/* w w w . j a va 2 s . com*/ if ((row == null) || (columnNames == null)) { throw new SerDeException("JDBC SerDe hasn't been initialized properly"); } row.clear(); MapWritable input = (MapWritable) blob; Text columnKey = new Text(); for (int i = 0; i < numColumns; i++) { columnKey.set(columnNames.get(i)); Writable value = input.get(columnKey); row.add(value instanceof NullWritable ? null : ((ObjectWritable) value).get()); } return row; }