List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:es.pic.astro.hadoop.io.BinaryOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * * @param jc//www. j a va2s. c om * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed); return new RecordWriter() { @Override public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; outStream.write(tr.getBytes(), 0, tr.getLength()); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); } } @Override public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:eu.larkc.RDFPig.io.NTriplesReader.java
License:Apache License
@Override public Tuple getNext() throws IOException { while (true) { mProtoTuple = null;//from w w w . jav a2 s .c o m try { boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = null; try { value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); if (len < 3) continue; // Ignore lines with less than 3 bytes //Get rid of any trailing whitespace while (Character.isWhitespace(buf[len - 1])) len--; if (buf[len - 1] != '.') continue;//throw new ExecException("Could not parse triple, no trailing \'.\': " + value); else len--; //Get rid of any trailing whitespace while (Character.isWhitespace(buf[len - 1])) len--; int start = 0; while (Character.isWhitespace(buf[start])) start++; // Parse subject boolean isURI = buf[0] == '<'; for (int i = 0; i < len; i++) { if (isURI && buf[i] == '>') { readField(buf, start, i + 1); start = i + 1; break; } else if (Character.isWhitespace(buf[i])) { readField(buf, start, i); start = i + 1; break; } } while (Character.isWhitespace(buf[start])) start++; // Parse predicate (always URI) for (int i = start; i < len; i++) { if (buf[i] == '>') { readField(buf, start, i + 1); start = i + 1; break; } } while (Character.isWhitespace(buf[start])) start++; // Parse object if (buf[start] == '<') //URI for (int i = start + 1; i < len; i++) { if (buf[i] == '>') { readField(buf, start, i + 1); start = i + 1; break; } } else if (buf[start] == '"') //Literal for (int i = start + 1; i < len; i++) { if (buf[i] == '"' && i > 0 && buf[i - 1] != '\\') { readField(buf, start, i + 1); start = i + 1; break; } } else if (buf[start] == '_') {//BNode int i = start + 1; for (; i < len; i++) { if (Character.isWhitespace(buf[i])) { readField(buf, start, i); start = i + 1; break; } } // We are at end of line, read it readField(buf, start, i); } else continue;//throw new ExecException("Could not parse triple, invalid term in object position: " + value); // After the first three terms, the rest are ignored if (mProtoTuple.size() != 3) continue; Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); mProtoTuple = null; return t; } catch (Exception e) { e.printStackTrace(); System.err.println("For line: " + value); mProtoTuple = null; } } catch (Exception e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } } }
From source file:ezbake.protect.test.security.AutoSecurityTest.java
License:Apache License
protected void writeRow(Map.Entry<Key, Value> row, Text value) throws RegistrationException { Connector connector = null;/*from w w w . ja v a2s .co m*/ BatchWriter writer = null; try { connector = new AccumuloHelper(configuration).getConnector(); writer = connector.createBatchWriter(REG_TABLE, 1000000L, 1000L, 10); Mutation m = new Mutation(row.getKey().getRow()); m.put(row.getKey().getColumnFamily(), row.getKey().getColumnQualifier(), new ColumnVisibility("U"), new Value(value.getBytes())); writer.addMutation(m); } catch (IOException e) { throw new RegistrationException("Error: IOException " + e); } catch (TableNotFoundException e) { throw new RegistrationException("Error: Accumulo Misconfigured - table is not found " + e); } catch (MutationsRejectedException e) { throw new RegistrationException("Error: Mutation Rejected " + e); } finally { if (writer != null) { try { writer.close(); } catch (MutationsRejectedException e) { throw new RegistrationException("Error: Mutation Rejected " + e); } } } }
From source file:ezbake.security.persistence.impl.AccumuloRegistrationManager.java
License:Apache License
protected void writeRow(Map.Entry<Key, Value> row, Text value) throws RegistrationException { try {//from w w w . j ava 2s.c om Connector connector = new AccumuloHelper(configuration).getConnector(); BatchWriter writer = connector.createBatchWriter(REG_TABLE, 1000000L, 1000L, 10); Mutation m = new Mutation(row.getKey().getRow()); m.put(row.getKey().getColumnFamily(), row.getKey().getColumnQualifier(), new ColumnVisibility(visibilityToken), new Value(value.getBytes())); writer.addMutation(m); writer.close(); } catch (IOException e) { throw new RegistrationException("Error: IOException " + e); } catch (TableNotFoundException e) { throw new RegistrationException("Error: Accumulo Misconfigured - table is not found " + e); } catch (MutationsRejectedException e) { throw new RegistrationException("Error: Mutation Rejected " + e); } }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
@Override public boolean nextKeyValue() throws IOException, CharacterCodingException { if (!lineRR.nextKeyValue()) return false; Text line = getCurrentValue(); int tabOne = line.find("\t"); int rid = Integer.parseInt(Text.decode(line.getBytes(), 0, tabOne)); int tabTwo = line.find("\t", tabOne + 1); int posBeg = tabOne + 1; int posEnd = tabTwo - 1; int pos = Integer.parseInt(Text.decode(line.getBytes(), posBeg, posEnd - posBeg + 1)); key.set(BAMRecordReader.getKey0(rid, pos)); return true;// w w w. j a v a 2 s . c o m }
From source file:fi.tkk.ics.hadoop.bam.SequencedFragment.java
License:Open Source License
/** * Convert quality scores in-place.//from ww w. ja v a 2s . com * * @raise FormatException if quality scores are out of the range * allowed by the current encoding. * @raise IllegalArgumentException if current and target quality encodings are the same. */ public static void convertQuality(Text quality, BaseQualityEncoding current, BaseQualityEncoding target) { if (current == target) throw new IllegalArgumentException( "current and target quality encodinds are the same (" + current + ")"); byte[] bytes = quality.getBytes(); final int len = quality.getLength(); final int illuminaSangerDistance = FormatConstants.ILLUMINA_OFFSET - FormatConstants.SANGER_OFFSET; if (current == BaseQualityEncoding.Illumina && target == BaseQualityEncoding.Sanger) { for (int i = 0; i < len; ++i) { if (bytes[i] < FormatConstants.ILLUMINA_OFFSET || bytes[i] > (FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX)) { throw new FormatException("base quality score out of range for Illumina Phred+64 format (found " + (bytes[i] - FormatConstants.ILLUMINA_OFFSET) + " but acceptable range is [0," + FormatConstants.ILLUMINA_MAX + "]).\n" + "Maybe qualities are encoded in Sanger format?\n"); } bytes[i] -= illuminaSangerDistance; } } else if (current == BaseQualityEncoding.Sanger && target == BaseQualityEncoding.Illumina) { for (int i = 0; i < len; ++i) { if (bytes[i] < FormatConstants.SANGER_OFFSET || bytes[i] > (FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX)) { throw new FormatException("base quality score out of range for Sanger Phred+64 format (found " + (bytes[i] - FormatConstants.SANGER_OFFSET) + " but acceptable range is [0," + FormatConstants.SANGER_MAX + "]).\n" + "Maybe qualities are encoded in Illumina format?\n"); } bytes[i] += illuminaSangerDistance; } } else throw new IllegalArgumentException( "unsupported BaseQualityEncoding transformation from " + current + " to " + target); }
From source file:fi.tkk.ics.hadoop.bam.SequencedFragment.java
License:Open Source License
/** * Verify that the given quality bytes are within the range allowed for the specified encoding. * * In theory, the Sanger encoding uses the entire * range of characters from ASCII 33 to 126, giving a value range of [0,93]. However, values over 60 are * unlikely in practice, and are more likely to be caused by mistaking a file that uses Illumina encoding * for Sanger. So, we'll enforce the same range supported by Illumina encoding ([0,62]) for Sanger. * * @return -1 if quality is ok.// w ww. j a v a2 s . co m * @return If an out-of-range value is found the index of the value is returned. */ public static int verifyQuality(Text quality, BaseQualityEncoding encoding) { // set allowed quality range int max, min; if (encoding == BaseQualityEncoding.Illumina) { max = FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX; min = FormatConstants.ILLUMINA_OFFSET; } else if (encoding == BaseQualityEncoding.Sanger) { max = FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX; min = FormatConstants.SANGER_OFFSET; } else throw new IllegalArgumentException("Unsupported base encoding quality " + encoding); // verify final byte[] bytes = quality.getBytes(); final int len = quality.getLength(); for (int i = 0; i < len; ++i) { if (bytes[i] < min || bytes[i] > max) return i; } return -1; }
From source file:fm.last.darling.hbase.HBaseJSONOutputReader.java
License:Apache License
private byte[] trimOuterBytes(Text text) { byte[] bytes = new byte[text.getLength() - 2]; System.arraycopy(text.getBytes(), 1, bytes, 0, bytes.length); return bytes; }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionRecordWriter.java
License:LGPL
@Override public synchronized void write(final Text key, final LongWritable value) throws IOException, InterruptedException { this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1); if (value == null) { return;//w w w . j a va 2 s .c o m } this.out.write(key.getBytes(), 0, key.getLength()); this.out.write(separator); this.out.write(value.toString().getBytes(StandardCharsets.UTF_8)); this.out.write(newline); this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1); }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMRecordWriter.java
License:LGPL
@Override public synchronized void write(final Text key, final Text value) throws IOException, InterruptedException { this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1); if (value == null) { return;/*from w w w . j a va2s .c o m*/ } this.out.write(value.getBytes(), 0, value.getLength()); this.out.write(newline); this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1); }