List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.examples.ch03.PageViewMapper.java
@Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] tokens = value.toString().split("\t"); if (tokens.length > 3) { String page = tokens[2];/*from www. j ava 2s . c o m*/ String ip = tokens[0]; first.set(page); second.set(ip); compositeKey.setFirst(first); compositeKey.setSecond(second); outputValue.set(ip); context.write(compositeKey, outputValue); } }
From source file:com.facebook.hive.orc.lazy.OrcLazyStringObjectInspector.java
License:Open Source License
@Override public String getPrimitiveJavaObject(Object o) { Text text = getPrimitiveWritableObject(o); return text == null ? null : text.toString(); }
From source file:com.facebook.presto.accumulo.examples.TpcHClerkSearch.java
License:Apache License
@Override public int run(AccumuloConfig config, CommandLine cmd) throws Exception { String[] searchTerms = cmd.getOptionValues(CLERK_ID); ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers()); Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword())); // Ensure both tables exists validateExists(conn, DATA_TABLE);/*from ww w . ja v a 2 s .c o m*/ validateExists(conn, INDEX_TABLE); long start = System.currentTimeMillis(); // Create a scanner against the index table BatchScanner idxScanner = conn.createBatchScanner(INDEX_TABLE, new Authorizations(), 10); LinkedList<Range> searchRanges = new LinkedList<Range>(); // Create a search Range from the command line args for (String searchTerm : searchTerms) { if (clerkRegex.matcher(searchTerm).matches()) { searchRanges.add(new Range(searchTerm)); } else { throw new InvalidParameterException( format("Search term %s does not match regex Clerk#[0-9]{9}", searchTerm)); } } // Set the search ranges for our scanner idxScanner.setRanges(searchRanges); // A list to hold all of the order IDs LinkedList<Range> orderIds = new LinkedList<Range>(); String orderId; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> record : idxScanner) { // Get the order ID and add it to the list of order IDs orderIds.add(new Range(record.getKey().getColumnQualifier())); } // Close the batch scanner idxScanner.close(); // If clerkIDs is empty, log a message and return 0 if (orderIds.isEmpty()) { System.out.println("Found no orders with the given Clerk ID(s)"); return 0; } else { System.out.println(format("Searching data table for %d orders", orderIds.size())); } // Initialize the batch scanner to scan the data table with // the previously found order IDs as the ranges BatchScanner dataScanner = conn.createBatchScanner(DATA_TABLE, new Authorizations(), 10); dataScanner.setRanges(orderIds); dataScanner.addScanIterator(new IteratorSetting(1, WholeRowIterator.class)); Text row = new Text(); // The row ID Text colQual = new Text(); // The column qualifier of the current record Long orderkey = null; Long custkey = null; String orderstatus = null; Double totalprice = null; Date orderdate = null; String orderpriority = null; String clerk = null; Long shippriority = null; String comment = null; int numTweets = 0; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> entry : dataScanner) { entry.getKey().getRow(row); orderkey = decode(Long.class, row.getBytes(), row.getLength()); SortedMap<Key, Value> rowMap = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()); for (Map.Entry<Key, Value> record : rowMap.entrySet()) { // Get the column qualifier from the record's key record.getKey().getColumnQualifier(colQual); switch (colQual.toString()) { case CUSTKEY_STR: custkey = decode(Long.class, record.getValue().get()); break; case ORDERSTATUS_STR: orderstatus = decode(String.class, record.getValue().get()); break; case TOTALPRICE_STR: totalprice = decode(Double.class, record.getValue().get()); break; case ORDERDATE_STR: orderdate = decode(Date.class, record.getValue().get()); break; case ORDERPRIORITY_STR: orderpriority = decode(String.class, record.getValue().get()); break; case CLERK_STR: clerk = decode(String.class, record.getValue().get()); break; case SHIPPRIORITY_STR: shippriority = decode(Long.class, record.getValue().get()); break; case COMMENT_STR: comment = decode(String.class, record.getValue().get()); break; default: throw new RuntimeException("Unknown column qualifier " + colQual); } } ++numTweets; // Write the screen name and text to stdout System.out.println(format("%d|%d|%s|%f|%s|%s|%s|%d|%s", orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment)); custkey = null; shippriority = null; orderstatus = null; orderpriority = null; clerk = null; comment = null; totalprice = null; orderdate = null; } // Close the batch scanner dataScanner.close(); long finish = System.currentTimeMillis(); System.out.format("Found %d orders in %s ms\n", numTweets, (finish - start)); return 0; }
From source file:com.facebook.presto.accumulo.index.Indexer.java
License:Apache License
/** * Gets a set of locality groups that should be added to the index table (not the metrics table). * * @param table Table for the locality groups, see AccumuloClient#getTable * @return Mapping of locality group to column families in the locality group, 1:1 mapping in * this case/*ww w . jav a 2 s . c o m*/ */ public static Map<String, Set<Text>> getLocalityGroups(AccumuloTable table) { Map<String, Set<Text>> groups = new HashMap<>(); // For each indexed column for (AccumuloColumnHandle columnHandle : table.getColumns().stream().filter(AccumuloColumnHandle::isIndexed) .collect(Collectors.toList())) { // Create a Text version of the index column family Text indexColumnFamily = new Text(getIndexColumnFamily(columnHandle.getFamily().get().getBytes(UTF_8), columnHandle.getQualifier().get().getBytes(UTF_8)).array()); // Add this to the locality groups, // it is a 1:1 mapping of locality group to column families groups.put(indexColumnFamily.toString(), ImmutableSet.of(indexColumnFamily)); } return groups; }
From source file:com.facebook.presto.accumulo.tools.RewriteMetricsTask.java
License:Apache License
private void rewriteMetrics(Connector connector, AccumuloTable table, long start) { LOG.info("Rewriting metrics for table " + table.getFullTableName()); TypedValueCombiner.Encoder<Long> encoder = new LongCombiner.StringEncoder(); BatchWriter writer = null;/* ww w . j a v a 2 s . c om*/ Scanner scanner = null; try { writer = connector.createBatchWriter(table.getIndexTableName() + "_metrics", bwc); LOG.info("Created batch writer against " + table.getIndexTableName() + "_metrics"); scanner = new IsolatedScanner(connector.createScanner(table.getIndexTableName(), auths)); LOG.info(format("Created isolated scanner against %s with auths %s", table.getIndexTableName(), auths)); Set<Pair<String, String>> timestampColumns = table.isTruncateTimestamps() ? table.getColumns().stream() .filter(x -> x.getType().equals(TimestampType.TIMESTAMP) && x.getFamily().isPresent()) .map(x -> Pair.of(x.getFamily().get(), x.getQualifier().get())).collect(Collectors.toSet()) : ImmutableSet.of(); LOG.info("Timestamp columns are " + timestampColumns); IteratorSetting timestampFilter = new IteratorSetting(21, "timestamp", TimestampFilter.class); TimestampFilter.setRange(timestampFilter, 0L, start); scanner.addScanIterator(timestampFilter); Map<Text, Map<Text, Map<ColumnVisibility, AtomicLong>>> rowMap = new HashMap<>(); long numMutations = 0L; boolean warned = true; Text prevRow = null; for (Entry<Key, Value> entry : scanner) { Text row = entry.getKey().getRow(); Text cf = entry.getKey().getColumnFamily(); if (prevRow != null && !prevRow.equals(row)) { writeMetrics(start, encoder, writer, rowMap); ++numMutations; if (numMutations % 500000 == 0) { if (dryRun) { LOG.info(format("In progress, would have written %s metric mutations", numMutations)); } else { LOG.info("In progress, metric mutations written: " + numMutations); } } } ColumnVisibility visibility = entry.getKey().getColumnVisibilityParsed(); incrementMetric(rowMap, row, cf, visibility); String[] famQual = cf.toString().split("_"); if (famQual.length == 2) { if (timestampColumns.contains(Pair.of(famQual[0], famQual[1]))) { incrementTimestampMetric(rowMap, cf, visibility, row); } } else if (warned) { LOG.warn( "Unable to re-write timestamp metric when either of a family/qualifier column mapping contains an underscore"); warned = false; } if (prevRow == null) { prevRow = new Text(row); } else { prevRow.set(row); } } // Write final metric writeMetrics(start, encoder, writer, rowMap); ++numMutations; if (dryRun) { LOG.info(format("Would have written %s mutations", numMutations)); } else { LOG.info("Finished rewriting metrics. Mutations written: " + numMutations); } } catch (TableNotFoundException e) { LOG.error("Table not found, must have been deleted during process", e); } catch (MutationsRejectedException e) { LOG.error("Server rejected mutations", e); } finally { if (writer != null) { try { writer.close(); } catch (MutationsRejectedException e) { LOG.error("Server rejected mutations", e); } } if (scanner != null) { scanner.close(); } } }
From source file:com.finderbots.miner.RegexUrlFilter.java
License:Apache License
public static List<String> getUrlFilterPatterns(String urlFiltersFile) throws IOException, InterruptedException { //this reads regex filters from a file in HDFS or the native file sysytem JobConf conf = HadoopUtils.getDefaultJobConf(); Path filterFile = new Path(urlFiltersFile); FileSystem fs = filterFile.getFileSystem(conf); List<String> filterList = new ArrayList<String>(); if (fs.exists(filterFile)) { FSDataInputStream in = fs.open(filterFile); LineReader reader = new LineReader(in); Text tLine = new Text(); while (reader.readLine(tLine) > 0) { String line = tLine.toString(); if (StringUtils.isNotBlank(line) && (line.startsWith(INCLUDE_CHAR) || line.startsWith(EXCLUDE_CHAR))) { filterList.add(line.trim()); }// ww w. j a v a 2 s .com } in.close(); } return filterList; }
From source file:com.flytxt.yesbank.mapper.HdfsEngineMapper.java
License:Open Source License
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] hdfsDataArray = value.toString().split(","); for (String val : hdfsDataArray) { System.out.println(" hdfs data values :" + val); }/*from ww w . j av a 2s .c o m*/ List<TagInfoBean> hbaseStoreTagValueList = new ArrayList<>(); StringTokenizer itr = new StringTokenizer(value.toString(), ","); Configuration conf = context.getConfiguration(); System.out.println("no of tokens " + itr.countTokens()); for (int i = 0; i < itr.countTokens(); i++) { System.out.println(" next token values " + itr.nextToken()); } String customerIdAsRowkey = null; for (int ii = 0; ii < hdfsDataArray.length; ii++) { if (ii == 0) { customerIdAsRowkey = hdfsDataArray[ii]; continue; } if (tagInfoMap.containsKey(ii)) { tagInfoBean = tagInfoMap.get(ii); tagInfoBean.setTagNameValue_hdfs(hdfsDataArray[ii]); tagInfoBean.setCustomerIdRowKey_hfds(customerIdAsRowkey); hbaseStoreTagValueList.add(tagInfoBean); } else { System.out.format(" Tag Info Header key is not avaiable for the column %i%n", ii); } } // Create / update the hbase database. Configuration hbaseConfig = HBaseConfiguration.create(); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); for (TagInfoBean tagInfo : hbaseStoreTagValueList) { HTable hTable = new HTable(hbaseConfig, tagInfo.getTableName()); Put p = new Put(Bytes.toBytes(tagInfo.getCustomerIdRowKey_hfds())); p.add(Bytes.toBytes(tagInfo.getColumnFamily()), Bytes.toBytes(tagInfo.getTagName()), Bytes.toBytes(tagInfo.getTagNameValue_hdfs())); hTable.put(p); System.out.println("Hbase data inserted successfully "); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }//w w w. j a va2 s . c om // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.github.joshelser.YcsbBatchScanner.java
License:Apache License
private List<Range> computeRanges() throws Exception { List<Text> rows = computeAllRows(); log.info("Calculated all rows: Found {} rows", rows.size()); Collections.shuffle(rows);//from w ww . ja v a2 s . c o m log.info("Shuffled all rows"); LinkedList<Range> ranges = new LinkedList<>(); Random rand = new Random(); for (Text row : rows.subList(0, this.numRanges)) { // The row, pick a random cq ranges.add(Range.exact(row.toString(), "ycsb", "field" + rand.nextInt(10))); } return ranges; }
From source file:com.github.seqware.queryengine.plugins.contribs.DonorsToMutationsAndGenesAggregationPlugin.java
License:Open Source License
@Override public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) { // key is feature set, value is mutation->gene that can just be cat'd Text newVal = new Text(); StringBuilder newValSB = new StringBuilder(); newValSB.append(key).append("\t"); boolean first = true; for (Text val : values) { if (first) { first = false;/*from w w w.j a v a 2 s .c o m*/ } else { newValSB.append(";"); } newValSB.append(val.toString()); } newVal.set(newValSB.toString()); reducerInterface.write(newVal, null); }