List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.elasticsearch.hadoop.mr.WritableValueWriter.java
License:Apache License
@Override @SuppressWarnings({ "unchecked", "deprecation" }) public Result write(Writable writable, Generator generator) { if (writable == null || writable instanceof NullWritable) { generator.writeNull();/*from w w w.jav a 2 s . c o m*/ } else if (writable instanceof Text) { Text text = (Text) writable; generator.writeUTF8String(text.getBytes(), 0, text.getLength()); } else if (writable instanceof UTF8) { UTF8 utf8 = (UTF8) writable; generator.writeUTF8String(utf8.getBytes(), 0, utf8.getLength()); } else if (WritableCompatUtil.isShortWritable(writable)) { generator.writeNumber(WritableCompatUtil.unwrap(writable)); } else if (writable instanceof IntWritable) { generator.writeNumber(((IntWritable) writable).get()); } else if (writable instanceof LongWritable) { generator.writeNumber(((LongWritable) writable).get()); } else if (writable instanceof VLongWritable) { generator.writeNumber(((VLongWritable) writable).get()); } else if (writable instanceof VIntWritable) { generator.writeNumber(((VIntWritable) writable).get()); } else if (writable instanceof ByteWritable) { generator.writeNumber(((ByteWritable) writable).get()); } else if (writable instanceof DoubleWritable) { generator.writeNumber(((DoubleWritable) writable).get()); } else if (writable instanceof FloatWritable) { generator.writeNumber(((FloatWritable) writable).get()); } else if (writable instanceof BooleanWritable) { generator.writeBoolean(((BooleanWritable) writable).get()); } else if (writable instanceof BytesWritable) { BytesWritable bw = (BytesWritable) writable; generator.writeBinary(bw.getBytes(), 0, bw.getLength()); } else if (writable instanceof MD5Hash) { generator.writeString(writable.toString()); } else if (writable instanceof ArrayWritable) { generator.writeBeginArray(); for (Writable wrt : ((ArrayWritable) writable).get()) { Result result = write(wrt, generator); if (!result.isSuccesful()) { return result; } } generator.writeEndArray(); } else if (writable instanceof AbstractMapWritable) { Map<Writable, Writable> map = (Map<Writable, Writable>) writable; generator.writeBeginObject(); // ignore handling sets (which are just maps with null values) for (Entry<Writable, Writable> entry : map.entrySet()) { String fieldName = entry.getKey().toString(); if (shouldKeep(generator.getParentPath(), fieldName)) { generator.writeFieldName(fieldName); Result result = write(entry.getValue(), generator); if (!result.isSuccesful()) { return result; } } } generator.writeEndObject(); } else { if (writeUnknownTypes) { return handleUnknown(writable, generator); } return Result.FAILED(writable); } return Result.SUCCESFUL(); }
From source file:org.exoplatform.LogLoader.java
License:Open Source License
/** * {@inheritDoc}/*from www. j a v a2 s . c om*/ */ @Override public Tuple getNext() throws IOException { try { if (!in.nextKeyValue()) { return null; } Text value = (Text) in.getCurrentValue(); DataContext context = new DataContext(value.getBytes()); Tuple tuple = parseData(context); return tuple; } catch (InterruptedException e) { throw new ExecException(e.getMessage(), e); } }
From source file:org.gestore.hadoop.LongRecordReader.java
License:Apache License
/****** * Gets one complete entry//from w w w .j a v a 2 s .c o m */ private int getEntry(Pattern matcherStart, Pattern matcherStop) throws IOException { boolean started = false; boolean done = false; ByteBuffer newLine = ByteBuffer.allocate(2); newLine.putChar('\n'); byte[] newLineBytes = newLine.array(); Text tempLine = new Text(); int totalRead = 0; int newRead = 0; // Discard lines before start record match, save first line that matches regex while (!started) { if (lastLine.getLength() <= 0) { newRead = in.readLine(tempLine, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); } else { tempLine = lastLine; newRead = lastLine.getLength(); lastLine = new Text(); } if (newRead == 0) { return 0; } totalRead += newRead; Matcher m = matcherStart.matcher(tempLine.toString()); if (m.matches()) { started = true; tempLine.append(newLineBytes, 0, newLineBytes.length); value.append(tempLine.getBytes(), 0, tempLine.getLength()); break; } } // Save lines until end record match, save last line while (!done) { newRead = in.readLine(tempLine, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); if (newRead == 0) { return totalRead; } totalRead += newRead; Matcher m = matcherStop.matcher(tempLine.toString()); if (m.matches()) { done = true; lastLine = tempLine; return totalRead -= newRead; } tempLine.append(newLineBytes, 0, newLineBytes.length); value.append(tempLine.getBytes(), 0, tempLine.getLength()); } return totalRead; }
From source file:org.hypertable.hadoop.mapred.HypertableRecordWriter.java
License:Open Source License
/** * Write data to HT/*from www .ja v a2s .c om*/ */ public void write(Text key, Text value) throws IOException { try { key.append(tab, 0, tab.length); m_line.clear(); m_line.append(key.getBytes(), 0, key.getLength()); m_line.append(value.getBytes(), 0, value.getLength()); int len = m_line.getLength(); int tab_count = 0; int tab_pos = 0; int found = 0; while (found != -1) { found = m_line.find(tab_str, found + 1); if (found > 0) { tab_count++; if (tab_count == 1) tab_pos = found; } } boolean has_timestamp; if (tab_count >= 3) { has_timestamp = true; } else if (tab_count == 2) { has_timestamp = false; } else { throw new Exception("incorrect output line format only " + tab_count + " tabs"); } byte[] byte_array = m_line.getBytes(); int row_offset, row_length; int family_offset = 0, family_length = 0; int qualifier_offset = 0, qualifier_length = 0; int value_offset = 0, value_length = 0; long timestamp = SerializedCellsFlag.AUTO_ASSIGN; int offset = 0; if (has_timestamp) { timestamp = Long.parseLong(m_line.decode(byte_array, 0, tab_pos)); offset = tab_pos + 1; } row_offset = offset; tab_pos = m_line.find(tab_str, offset); row_length = tab_pos - row_offset; offset = tab_pos + 1; family_offset = offset; tab_pos = m_line.find(tab_str, offset); for (int i = family_offset; i < tab_pos; i++) { if (byte_array[i] == ':' && qualifier_offset == 0) { family_length = i - family_offset; qualifier_offset = i + 1; } } // no qualifier if (qualifier_offset == 0) family_length = tab_pos - family_offset; else qualifier_length = tab_pos - qualifier_offset; offset = tab_pos + 1; value_offset = offset; value_length = len - value_offset; if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length, byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset, value_length, SerializedCellsFlag.FLAG_INSERT)) { mClient.mutator_set_cells_serialized(mMutator, mCellsWriter.buffer(), false); mCellsWriter.clear(); if ((row_length + family_length + qualifier_length + value_length + 32) > mCellsWriter.capacity()) mCellsWriter = new SerializedCellsWriter( row_length + family_length + qualifier_length + value_length + 32); if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length, byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset, value_length, SerializedCellsFlag.FLAG_INSERT)) throw new IOException("Unable to add cell to SerializedCellsWriter " + "(row='" + new String(byte_array, row_offset, row_length, "UTF-8") + "'"); } } catch (Exception e) { log.error(e); throw new IOException("Unable to write cell - " + e.toString()); } }
From source file:org.loggo.search.cli.Search.java
License:Apache License
public void query() throws Exception { BatchScanner bs = conn.createBatchScanner(opts.table, Authorizations.EMPTY, 8); try {/*from w ww . jav a 2 s. co m*/ // Compute the user's date range, if any SimpleDateFormat sdf = new SimpleDateFormat(LogEntry.DATE_FORMAT); String startDate = ""; if (opts.start != null) { startDate = sdf.format(new Date(opts.start)); } String endDate = "9999"; if (opts.end != null) { endDate = sdf.format(new Date(opts.end)); } if (opts.start != null || opts.end != null) { // Set the date ranges for each shard List<Range> ranges = new ArrayList<>(Schema.SHARDS); for (int i = 0; i < Schema.SHARDS; i++) { Range r = new Range(String.format(ROW_FORMAT, i, startDate), String.format(ROW_FORMAT, i, endDate)); ranges.add(r); } bs.setRanges(ranges); } else { // full table scan bs.setRanges(Collections.singletonList(new Range())); } // Set the filter for applications and host int priority = 100; if (!opts.hosts.isEmpty() || !opts.applications.isEmpty()) { IteratorSetting is = new IteratorSetting(priority++, HostAndApplicationFilter.class); HostAndApplicationFilter.setApps(is, opts.applications); HostAndApplicationFilter.setHosts(is, opts.hosts); bs.addScanIterator(is); } // stack the iterators for multiple terms: each term must match to return results List<String> families = Arrays.asList(Schema.FAMILIES); if (!opts.terms.isEmpty()) { for (int i = 0; i < opts.terms.size(); i++) { String term = opts.terms.get(i); IteratorSetting is; if (opts.regexp) { is = new IteratorSetting(priority++, RegExFilter.class); RegExFilter.setRegexs(is, null, null, null, term, false); } else { is = new IteratorSetting(priority++, "name" + i, GrepValueFilter.class); GrepValueFilter.setTerm(is, term); if (families.contains(term)) { bs.fetchColumnFamily(new Text(term)); } } bs.addScanIterator(is); } } // Just get the count: don't bother returning whole records if (opts.count) { IteratorSetting is = new IteratorSetting(priority++, CountingIterator.class); bs.addScanIterator(is); long total = 0; for (Entry<Key, Value> entry : bs) { total += Long.parseLong(entry.getValue().toString()); } printer.println(total); return; } // Get stats, not logs if (opts.duration != null) { final long duration = opts.duration; SimpleDateFormat fmt = new SimpleDateFormat(LogEntry.DATE_FORMAT); // Stats iterator pulls out counts by CF IteratorSetting is = new IteratorSetting(priority++, StatsIterator.class); StatsIterator.duration(is, opts.duration, TimeUnit.MILLISECONDS); bs.addScanIterator(is); // Group counts under the right "bucket" of time SortedMap<Long, Map<String, Long>> stats = new TreeMap<>(); for (Entry<Key, Value> entry : bs) { Key key = entry.getKey(); long ts = StatsIterator.getTs(key, fmt); // convert to start time for this bucket ts -= ts % duration; Map<String, Long> byCF = stats.get(ts); if (byCF == null) { stats.put(ts, byCF = new TreeMap<>()); } // Add values, by name given a string: "NAME:VALUE,NAME2:VALUE2" String value = entry.getValue().toString(); if (!value.isEmpty()) { String nameCounts[] = value.split(","); for (String nameCount : nameCounts) { String parts[] = nameCount.split(":"); Long current = byCF.get(parts[0]); if (current == null) { current = Long.decode(parts[1]); } else { current = Long.decode(parts[1]) + current.longValue(); } byCF.put(parts[0], current); } } } if (stats.isEmpty()) return; // Use the range of the data, or a user specified range, if provided long start = stats.firstKey(); long end = stats.lastKey(); if (opts.start != null) { start = opts.start - (opts.start % duration); } if (opts.end != null) { end = opts.end - (opts.end % duration); } // Print a line for each bucket, even if there's no data for (long time = start; time <= end; time += duration) { Map<String, Long> byCF = stats.get(time); List<String> byCFList = new ArrayList<>(); if (byCF != null) { for (Entry<String, Long> entry : byCF.entrySet()) { byCFList.add(String.format("%s: %d", entry.getKey(), entry.getValue())); } } printer.println( String.format("%s\t%s", fmt.format(new Date(time)), Joiner.on(", ").join(byCFList))); } return; } // Read the whole list for sorting. Unfortunately this means it has to fit into memory. ArrayList<Entry<Key, Value>> results = new ArrayList<Entry<Key, Value>>(); for (Entry<Key, Value> entry : bs) { results.add(entry); } if (opts.sort || opts.reverse) { final int order = opts.reverse ? -1 : 1; Collections.sort(results, new Comparator<Entry<Key, Value>>() { @Override public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) { Text row = o1.getKey().getRow(); Text row2 = o2.getKey().getRow(); return order * BytesWritable.Comparator.compareBytes(row.getBytes(), Schema.SHARD_LENGTH, row.getLength() - Schema.SHARD_LENGTH, row2.getBytes(), Schema.SHARD_LENGTH, row2.getLength() - Schema.SHARD_LENGTH); } }); } for (Entry<Key, Value> entry : results) { String cq = entry.getKey().getColumnQualifier().toString(); String parts[] = cq.split(Schema.APP_HOST_SEPARATOR); String row = entry.getKey().getRow().toString(); String value = entry.getValue().toString(); printer.println(String.format("%s\t%s\t%s\t%s", row.substring(Schema.SHARD_LENGTH), parts[0], parts[1], value)); } } finally { bs.close(); } }
From source file:org.mgrover.hive.translate.GenericUDFTranslate.java
License:Apache License
/** * Pre-process the from and to strings populate {@link #replacementMap} and {@link #deletionSet}. * /*from w w w . j av a 2 s . co m*/ * @param from * from string to be used for translation * @param to * to string to be used for translation */ private void populateMappings(Text from, Text to) { replacementMap.clear(); deletionSet.clear(); ByteBuffer fromBytes = ByteBuffer.wrap(from.getBytes(), 0, from.getLength()); ByteBuffer toBytes = ByteBuffer.wrap(to.getBytes(), 0, to.getLength()); // Traverse through the from string, one code point at a time while (fromBytes.hasRemaining()) { // This will also move the iterator ahead by one code point int fromCodePoint = Text.bytesToCodePoint(fromBytes); // If the to string has more code points, make sure to traverse it too if (toBytes.hasRemaining()) { int toCodePoint = Text.bytesToCodePoint(toBytes); // If the code point from from string already has a replacement or is to be deleted, we // don't need to do anything, just move on to the next code point if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) { continue; } replacementMap.put(fromCodePoint, toCodePoint); } else { // If the code point from from string already has a replacement or is to be deleted, we // don't need to do anything, just move on to the next code point if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) { continue; } deletionSet.add(fromCodePoint); } } }
From source file:org.mgrover.hive.translate.GenericUDFTranslate.java
License:Apache License
/** * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and * returns the translated string.// w ww . j a va2 s . c o m * * @param input * input string to perform the translation on * @return translated string */ private String processInput(Text input) { StringBuilder resultBuilder = new StringBuilder(); // Obtain the byte buffer from the input string so we can traverse it code point by code point ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength()); // Traverse the byte buffer containing the input string one code point at a time while (inputBytes.hasRemaining()) { int inputCodePoint = Text.bytesToCodePoint(inputBytes); // If the code point exists in deletion set, no need to emit out anything for this code point. // Continue on to the next code point if (deletionSet.contains(inputCodePoint)) { continue; } Integer replacementCodePoint = replacementMap.get(inputCodePoint); // If a replacement exists for this code point, emit out the replacement and append it to the // output string. If no such replacement exists, emit out the original input code point char[] charArray = Character .toChars((replacementCodePoint != null) ? replacementCodePoint : inputCodePoint); resultBuilder.append(charArray); } String resultString = resultBuilder.toString(); return resultString; }
From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java
License:Open Source License
public void writeTextBytes(FSDataOutputStream fos, Text text) throws IOException { byte[] bytes = text.getBytes(); int length = bytes.length; fos.writeInt(length);/* w ww .j a v a2s . c om*/ fos.write(bytes, 0, length); }
From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java
License:Apache License
/** * Convert a Text object of a tileId to a back to a long. * * @param rowId Text object to convert.// ww w . j a v a2 s . c om * @return the long value from the Text object. */ public static long toLong(Text rowId) { byte[] outB = new byte[8]; for (int x = 0; x < outB.length; x++) { if (x >= rowId.getLength()) { outB[x] = 0x0; } else { outB[x] = rowId.getBytes()[x]; } } return ByteBuffer.wrap(outB).getLong(); }
From source file:org.mrgeo.vector.mrsvector.OSMTileIngester.java
License:Apache License
static byte[] getBytes(final Text text) { byte[] bytes = text.getBytes(); if (text.getLength() == bytes.length) { return bytes; }/*from w w w . j a v a 2 s. c o m*/ return Arrays.copyOf(bytes, text.getLength()); }