List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java
License:Apache License
public byte[] indexTextSet(boolean skipSingle, byte[] existingData, Iterable<Text> values, boolean isAnalyzed, String fieldName) throws IOException { byte[] finalData = null; int containerKey = 0; int containervalue = 0; String[] resultValue = new String[2]; HSearchTableKVIndex table = new HSearchTableKVIndex(); if (null != existingData) { throw new IOException("Append is not supported for the non repetable analyzed fields"); }//from w w w. ja v a2 s.c o m String line = null; int docType = 1; int fieldType = 1; String metaDoc = "-"; boolean flag = true; records.clear(); for (Text text : values) { if (null == text) continue; Arrays.fill(resultValue, null); line = text.toString(); LineReaderUtil.fastSplit(resultValue, line, KVIndexer.FIELD_SEPARATOR); containerKey = Integer.parseInt(resultValue[0]); if (null == resultValue[1]) continue; containervalue = Integer.parseInt(resultValue[1]); table.put(docType, fieldType, metaDoc, containervalue, containerKey, flag); if (skipSingle) { if (records.size() < 2) records.add(containerKey); } } if (skipSingle && records.size() < 2) return null; finalData = table.toBytes(); return finalData; }
From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java
License:Apache License
public byte[] indexTextBitset(boolean skipSingle, byte[] existingData, Iterable<Text> values, boolean isAnalyzed, String fieldName, boolean isCompressed) throws IOException { byte[] finalData = null; int containerKey = 0; BitSetWrapper foundIds = null;/* w ww. ja va2s .c om*/ int existingDataLen = (null == existingData) ? 0 : existingData.length; if (existingDataLen > 0) { byte[] uncompressedData = existingData; if (isCompressed) { uncompressedData = Snappy.uncompress(existingData, 0, existingDataLen); } foundIds = SortedBytesBitset.getInstanceBitset().bytesToBitSet(uncompressedData, 0, uncompressedData.length); } else { foundIds = new BitSetWrapper(); } for (Text text : values) { if (null == text) continue; containerKey = Integer.parseInt(text.toString()); foundIds.set(containerKey); } if (skipSingle && foundIds.cardinality() < 2) return null; if (isCompressed) finalData = SortedBytesBitsetCompressed.getInstanceBitset().bitSetToBytes(foundIds); else finalData = SortedBytesBitset.getInstanceBitset().bitSetToBytes(foundIds); if (null == finalData) return finalData; return finalData; }
From source file:com.bizosys.hsearch.kv.indexing.KVReducerHBase.java
License:Apache License
@Override protected void reduce(TextPair key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String rowKeyP1 = key.getFirst().toString(); String rowKeyP2 = key.getSecond().toString(); int sepIndex = -1; String dataType = null;//www . j a v a 2 s.c om int sourceSeq = 0; Field fld = null; char dataTypeChar = '-'; if (!rowKeyP1.equals(KVIndexer.MERGEKEY_ROW)) { sepIndex = rowKeyP2.indexOf(KVIndexer.FIELD_SEPARATOR); dataType = rowKeyP2.substring(0, sepIndex).toLowerCase(); sourceSeq = Integer.parseInt(rowKeyP2.substring(sepIndex + 1)); fld = fm.sourceSeqWithField.get(sourceSeq); dataTypeChar = KVIndexer.dataTypesPrimitives.get(dataType); } byte[] existingData = null; if (fm.append) { List<NVBytes> cells = HReader.getCompleteRow(fm.tableName, rowKeyP1.getBytes()); if (null != cells) { if (cells.size() > 0) { NVBytes nvBytes = cells.get(0); if (null != nvBytes) existingData = nvBytes.data; } } } StringBuilder rowKeyText = new StringBuilder(rowKeyP1); try { byte[] finalData = reducerUtil.cookBytes(rowKeyText, values, existingData, fld, dataTypeChar); if (null == finalData) return; Put put = new Put(rowKeyText.toString().getBytes()); put.add(KVIndexer.FAM_NAME, KVIndexer.COL_NAME, finalData); context.write(null, put); } catch (NumberFormatException ex) { IdSearchLog.l.fatal("Error at index reducer", ex); Iterator<Text> itr = values.iterator(); while (itr.hasNext()) { Text text = itr.next(); IdSearchLog.l.fatal(text.toString()); } throw new IOException("Error indexing data because " + ex.getMessage()); } }
From source file:com.bizosys.hsearch.kv.indexing.MapFileSizeReader.java
License:Apache License
public static void main(String[] args) { if (args.length < 1) { System.out.println("Usage: " + MapFileSizeReader.class + " <<hdfs-filepath>> <<key>>"); System.exit(1);//from w w w . j ava 2 s. com } String hdfsFilePath = args[0].trim(); String askedKey = null; if (args.length == 2) askedKey = (args[1].trim()); MapFile.Reader reader = null; try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(hdfsFilePath), conf); reader = new MapFile.Reader(fs, hdfsFilePath, conf); if (null == askedKey) { Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { if (null == value) System.out.println(key.toString() + "\t0"); System.out.println(key.toString() + "\t" + value.getLength()); } } else { Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); key.set(askedKey.getBytes()); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); reader.get(key, value); System.out.println(key.toString() + "\t" + value.getLength()); } } catch (Exception e) { System.err.println("Error in reading from HDFSFilepath:" + hdfsFilePath); e.printStackTrace(System.out); } finally { IOUtils.closeStream(reader); } }
From source file:com.blackberry.logdriver.mapred.boom.BoomFilterMapper.java
License:Apache License
@Override public void map(LogLineData key, Text value, OutputCollector<LogLineData, Text> output, Reporter reporter) throws IOException { if (filters == null || filters.size() == 0) { throw new IOException("No filters found."); }//from ww w . j av a 2s . c o m Filter thisFilter = null; for (int i = 0; i < filters.size(); i++) { thisFilter = filters.get(i); // Check for a match if (thisFilter.accept(value.toString())) { output.collect(key, value); // Reorder the filters, if necessary. Filter tmpFilter; while (i > 0 && thisFilter.getNumMatches() > filters.get(i - 1).getNumMatches()) { // move this filter up.. tmpFilter = filters.get(i - 1); filters.set(i - 1, thisFilter); filters.set(i, tmpFilter); i--; LOG.info("Filter set reordered. Currently: {}", filters); } // Stop processing filters. break; } } }
From source file:com.blackberry.logdriver.mapred.boom.ReBoomRecordWriter.java
License:Apache License
@Override public void write(LogLineData key, Text value) throws IOException { writer.writeLine(key.getTimestamp(), value.toString(), key.getEventId(), key.getCreateTime(), key.getBlockNumber());//from ww w.ja v a 2s . c o m }
From source file:com.blackberry.logdriver.mapreduce.boom.BoomFilterMapper.java
License:Apache License
@Override protected void map(LogLineData key, Text value, Context context) throws IOException, InterruptedException { if (filters.size() == 0) { throw new IOException("No filters found."); }//w w w . j av a 2 s .co m Filter thisFilter = null; for (int i = 0; i < filters.size(); i++) { thisFilter = filters.get(i); // Check for a match if (thisFilter.accept(value.toString())) { context.write(key, value); // Reorder the filters, if necessary. Filter tmpFilter; while (i > 0 && thisFilter.getNumMatches() > filters.get(i - 1).getNumMatches()) { // move this filter up.. tmpFilter = filters.get(i - 1); filters.set(i - 1, thisFilter); filters.set(i, tmpFilter); i--; LOG.info("Filter set reordered. Currently: {}", filters); } // Stop processing filters. break; } } }
From source file:com.boozallen.cognition.lens.Column.java
License:Apache License
/** * Initialize a column./*w w w. ja v a 2 s . c om*/ * @param source -- the source of the data * @param cf -- the column family in accumulo * @param cq -- the column qualifier in accumulo */ public Column(Source source, Text cf, Text cq) { this(source, cf.toString(), cq.toString()); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingMapper.java
License:Apache License
@Override protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException { String[] items = splitter.split(input.toString()); Set<String> uniqueItems = Sets.newHashSet(Arrays.asList(items)); for (String item : uniqueItems) { if (item.trim().isEmpty()) { continue; }//from w ww .j av a 2s . c om context.setStatus("Parallel Counting Mapper: " + item); context.write(new Text(item), ONE); } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException { String[] items = splitter.split(input.toString()); OpenIntHashSet itemSet = new OpenIntHashSet(); for (String item : items) { if (fMap.containsKey(item) && !item.trim().isEmpty()) { itemSet.add(fMap.get(item)); }/*from w w w . j ava2 s . co m*/ } IntArrayList itemArr = new IntArrayList(itemSet.size()); itemSet.keys(itemArr); itemArr.sort(); OpenIntHashSet groups = new OpenIntHashSet(); for (int j = itemArr.size() - 1; j >= 0; j--) { // generate group dependent shards int item = itemArr.get(j); int groupID = PFPGrowth.getGroup(item, maxPerGroup); if (!groups.contains(groupID)) { IntArrayList tempItems = new IntArrayList(j + 1); tempItems.addAllOfFromTo(itemArr, 0, j); context.setStatus("Parallel FPGrowth: Generating Group Dependent transactions for: " + item); wGroupID.set(groupID); context.write(wGroupID, new TransactionTree(tempItems, 1L)); } groups.add(groupID); } }