Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.examples.ch03.PageViewMapper.java

@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    String[] tokens = value.toString().split("\t");
    if (tokens.length > 3) {
        String page = tokens[2];/*from   www.  j  ava 2s . c o m*/
        String ip = tokens[0];
        first.set(page);
        second.set(ip);
        compositeKey.setFirst(first);
        compositeKey.setSecond(second);
        outputValue.set(ip);
        context.write(compositeKey, outputValue);
    }
}

From source file:com.facebook.hive.orc.lazy.OrcLazyStringObjectInspector.java

License:Open Source License

@Override
public String getPrimitiveJavaObject(Object o) {
    Text text = getPrimitiveWritableObject(o);
    return text == null ? null : text.toString();
}

From source file:com.facebook.presto.accumulo.examples.TpcHClerkSearch.java

License:Apache License

@Override
public int run(AccumuloConfig config, CommandLine cmd) throws Exception {
    String[] searchTerms = cmd.getOptionValues(CLERK_ID);

    ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers());
    Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword()));

    // Ensure both tables exists
    validateExists(conn, DATA_TABLE);/*from  ww  w  . ja v  a  2  s .c o m*/
    validateExists(conn, INDEX_TABLE);

    long start = System.currentTimeMillis();

    // Create a scanner against the index table
    BatchScanner idxScanner = conn.createBatchScanner(INDEX_TABLE, new Authorizations(), 10);
    LinkedList<Range> searchRanges = new LinkedList<Range>();

    // Create a search Range from the command line args
    for (String searchTerm : searchTerms) {
        if (clerkRegex.matcher(searchTerm).matches()) {
            searchRanges.add(new Range(searchTerm));
        } else {
            throw new InvalidParameterException(
                    format("Search term %s does not match regex Clerk#[0-9]{9}", searchTerm));
        }
    }

    // Set the search ranges for our scanner
    idxScanner.setRanges(searchRanges);

    // A list to hold all of the order IDs
    LinkedList<Range> orderIds = new LinkedList<Range>();
    String orderId;

    // Process all of the records returned by the batch scanner
    for (Map.Entry<Key, Value> record : idxScanner) {
        // Get the order ID and add it to the list of order IDs
        orderIds.add(new Range(record.getKey().getColumnQualifier()));
    }

    // Close the batch scanner
    idxScanner.close();

    // If clerkIDs is empty, log a message and return 0
    if (orderIds.isEmpty()) {
        System.out.println("Found no orders with the given Clerk ID(s)");
        return 0;
    } else {
        System.out.println(format("Searching data table for %d orders", orderIds.size()));
    }

    // Initialize the batch scanner to scan the data table with
    // the previously found order IDs as the ranges
    BatchScanner dataScanner = conn.createBatchScanner(DATA_TABLE, new Authorizations(), 10);
    dataScanner.setRanges(orderIds);
    dataScanner.addScanIterator(new IteratorSetting(1, WholeRowIterator.class));

    Text row = new Text(); // The row ID
    Text colQual = new Text(); // The column qualifier of the current record

    Long orderkey = null;
    Long custkey = null;
    String orderstatus = null;
    Double totalprice = null;
    Date orderdate = null;
    String orderpriority = null;
    String clerk = null;
    Long shippriority = null;
    String comment = null;

    int numTweets = 0;
    // Process all of the records returned by the batch scanner
    for (Map.Entry<Key, Value> entry : dataScanner) {
        entry.getKey().getRow(row);
        orderkey = decode(Long.class, row.getBytes(), row.getLength());
        SortedMap<Key, Value> rowMap = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue());
        for (Map.Entry<Key, Value> record : rowMap.entrySet()) {
            // Get the column qualifier from the record's key
            record.getKey().getColumnQualifier(colQual);

            switch (colQual.toString()) {
            case CUSTKEY_STR:
                custkey = decode(Long.class, record.getValue().get());
                break;
            case ORDERSTATUS_STR:
                orderstatus = decode(String.class, record.getValue().get());
                break;
            case TOTALPRICE_STR:
                totalprice = decode(Double.class, record.getValue().get());
                break;
            case ORDERDATE_STR:
                orderdate = decode(Date.class, record.getValue().get());
                break;
            case ORDERPRIORITY_STR:
                orderpriority = decode(String.class, record.getValue().get());
                break;
            case CLERK_STR:
                clerk = decode(String.class, record.getValue().get());
                break;
            case SHIPPRIORITY_STR:
                shippriority = decode(Long.class, record.getValue().get());
                break;
            case COMMENT_STR:
                comment = decode(String.class, record.getValue().get());
                break;
            default:
                throw new RuntimeException("Unknown column qualifier " + colQual);
            }
        }

        ++numTweets;
        // Write the screen name and text to stdout
        System.out.println(format("%d|%d|%s|%f|%s|%s|%s|%d|%s", orderkey, custkey, orderstatus, totalprice,
                orderdate, orderpriority, clerk, shippriority, comment));

        custkey = null;
        shippriority = null;
        orderstatus = null;
        orderpriority = null;
        clerk = null;
        comment = null;
        totalprice = null;
        orderdate = null;
    }

    // Close the batch scanner
    dataScanner.close();

    long finish = System.currentTimeMillis();

    System.out.format("Found %d orders in %s ms\n", numTweets, (finish - start));
    return 0;
}

From source file:com.facebook.presto.accumulo.index.Indexer.java

License:Apache License

/**
 * Gets a set of locality groups that should be added to the index table (not the metrics table).
 *
 * @param table Table for the locality groups, see AccumuloClient#getTable
 * @return Mapping of locality group to column families in the locality group, 1:1 mapping in
 * this case/*ww  w . jav  a  2  s . c  o  m*/
 */
public static Map<String, Set<Text>> getLocalityGroups(AccumuloTable table) {
    Map<String, Set<Text>> groups = new HashMap<>();
    // For each indexed column
    for (AccumuloColumnHandle columnHandle : table.getColumns().stream().filter(AccumuloColumnHandle::isIndexed)
            .collect(Collectors.toList())) {
        // Create a Text version of the index column family
        Text indexColumnFamily = new Text(getIndexColumnFamily(columnHandle.getFamily().get().getBytes(UTF_8),
                columnHandle.getQualifier().get().getBytes(UTF_8)).array());

        // Add this to the locality groups,
        // it is a 1:1 mapping of locality group to column families
        groups.put(indexColumnFamily.toString(), ImmutableSet.of(indexColumnFamily));
    }
    return groups;
}

From source file:com.facebook.presto.accumulo.tools.RewriteMetricsTask.java

License:Apache License

private void rewriteMetrics(Connector connector, AccumuloTable table, long start) {
    LOG.info("Rewriting metrics for table " + table.getFullTableName());

    TypedValueCombiner.Encoder<Long> encoder = new LongCombiner.StringEncoder();
    BatchWriter writer = null;/*  ww  w . j  a  v  a  2  s  . c  om*/
    Scanner scanner = null;
    try {
        writer = connector.createBatchWriter(table.getIndexTableName() + "_metrics", bwc);
        LOG.info("Created batch writer against " + table.getIndexTableName() + "_metrics");

        scanner = new IsolatedScanner(connector.createScanner(table.getIndexTableName(), auths));
        LOG.info(format("Created isolated scanner against %s with auths %s", table.getIndexTableName(), auths));

        Set<Pair<String, String>> timestampColumns = table.isTruncateTimestamps() ? table.getColumns().stream()
                .filter(x -> x.getType().equals(TimestampType.TIMESTAMP) && x.getFamily().isPresent())
                .map(x -> Pair.of(x.getFamily().get(), x.getQualifier().get())).collect(Collectors.toSet())
                : ImmutableSet.of();

        LOG.info("Timestamp columns are " + timestampColumns);

        IteratorSetting timestampFilter = new IteratorSetting(21, "timestamp", TimestampFilter.class);
        TimestampFilter.setRange(timestampFilter, 0L, start);
        scanner.addScanIterator(timestampFilter);

        Map<Text, Map<Text, Map<ColumnVisibility, AtomicLong>>> rowMap = new HashMap<>();
        long numMutations = 0L;
        boolean warned = true;
        Text prevRow = null;
        for (Entry<Key, Value> entry : scanner) {
            Text row = entry.getKey().getRow();
            Text cf = entry.getKey().getColumnFamily();

            if (prevRow != null && !prevRow.equals(row)) {
                writeMetrics(start, encoder, writer, rowMap);
                ++numMutations;

                if (numMutations % 500000 == 0) {
                    if (dryRun) {
                        LOG.info(format("In progress, would have written %s metric mutations", numMutations));
                    } else {
                        LOG.info("In progress, metric mutations written: " + numMutations);
                    }
                }
            }

            ColumnVisibility visibility = entry.getKey().getColumnVisibilityParsed();
            incrementMetric(rowMap, row, cf, visibility);
            String[] famQual = cf.toString().split("_");

            if (famQual.length == 2) {
                if (timestampColumns.contains(Pair.of(famQual[0], famQual[1]))) {
                    incrementTimestampMetric(rowMap, cf, visibility, row);
                }
            } else if (warned) {
                LOG.warn(
                        "Unable to re-write timestamp metric when either of a family/qualifier column mapping contains an underscore");
                warned = false;
            }

            if (prevRow == null) {
                prevRow = new Text(row);
            } else {
                prevRow.set(row);
            }
        }

        // Write final metric
        writeMetrics(start, encoder, writer, rowMap);
        ++numMutations;

        if (dryRun) {
            LOG.info(format("Would have written %s mutations", numMutations));
        } else {
            LOG.info("Finished rewriting metrics. Mutations written: " + numMutations);
        }
    } catch (TableNotFoundException e) {
        LOG.error("Table not found, must have been deleted during process", e);
    } catch (MutationsRejectedException e) {
        LOG.error("Server rejected mutations", e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (MutationsRejectedException e) {
                LOG.error("Server rejected mutations", e);
            }
        }

        if (scanner != null) {
            scanner.close();
        }
    }
}

From source file:com.finderbots.miner.RegexUrlFilter.java

License:Apache License

public static List<String> getUrlFilterPatterns(String urlFiltersFile)
        throws IOException, InterruptedException {
    //this reads regex filters from a file in HDFS or the native file sysytem
    JobConf conf = HadoopUtils.getDefaultJobConf();
    Path filterFile = new Path(urlFiltersFile);
    FileSystem fs = filterFile.getFileSystem(conf);
    List<String> filterList = new ArrayList<String>();
    if (fs.exists(filterFile)) {
        FSDataInputStream in = fs.open(filterFile);
        LineReader reader = new LineReader(in);
        Text tLine = new Text();
        while (reader.readLine(tLine) > 0) {
            String line = tLine.toString();
            if (StringUtils.isNotBlank(line)
                    && (line.startsWith(INCLUDE_CHAR) || line.startsWith(EXCLUDE_CHAR))) {
                filterList.add(line.trim());
            }//  ww w. j a  v  a  2  s .com
        }
        in.close();
    }
    return filterList;
}

From source file:com.flytxt.yesbank.mapper.HdfsEngineMapper.java

License:Open Source License

public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

    String[] hdfsDataArray = value.toString().split(",");

    for (String val : hdfsDataArray) {
        System.out.println(" hdfs data values   :" + val);
    }/*from ww w  . j av a  2s .c o m*/

    List<TagInfoBean> hbaseStoreTagValueList = new ArrayList<>();

    StringTokenizer itr = new StringTokenizer(value.toString(), ",");

    Configuration conf = context.getConfiguration();

    System.out.println("no of tokens  " + itr.countTokens());

    for (int i = 0; i < itr.countTokens(); i++) {

        System.out.println(" next token values " + itr.nextToken());

    }

    String customerIdAsRowkey = null;

    for (int ii = 0; ii < hdfsDataArray.length; ii++) {

        if (ii == 0) {
            customerIdAsRowkey = hdfsDataArray[ii];
            continue;
        }

        if (tagInfoMap.containsKey(ii)) {
            tagInfoBean = tagInfoMap.get(ii);

            tagInfoBean.setTagNameValue_hdfs(hdfsDataArray[ii]);
            tagInfoBean.setCustomerIdRowKey_hfds(customerIdAsRowkey);
            hbaseStoreTagValueList.add(tagInfoBean);

        } else {
            System.out.format(" Tag Info  Header key is not avaiable for the column %i%n", ii);
        }
    }

    // Create / update the hbase database.
    Configuration hbaseConfig = HBaseConfiguration.create();
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    for (TagInfoBean tagInfo : hbaseStoreTagValueList) {

        HTable hTable = new HTable(hbaseConfig, tagInfo.getTableName());

        Put p = new Put(Bytes.toBytes(tagInfo.getCustomerIdRowKey_hfds()));

        p.add(Bytes.toBytes(tagInfo.getColumnFamily()), Bytes.toBytes(tagInfo.getTagName()),
                Bytes.toBytes(tagInfo.getTagNameValue_hdfs()));
        hTable.put(p);
        System.out.println("Hbase data inserted successfully ");
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }//w  w w. j a  va2 s . c  om

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.github.joshelser.YcsbBatchScanner.java

License:Apache License

private List<Range> computeRanges() throws Exception {
    List<Text> rows = computeAllRows();
    log.info("Calculated all rows: Found {} rows", rows.size());
    Collections.shuffle(rows);//from  w  ww  .  ja  v a2 s . c  o  m
    log.info("Shuffled all rows");
    LinkedList<Range> ranges = new LinkedList<>();
    Random rand = new Random();
    for (Text row : rows.subList(0, this.numRanges)) {
        // The row, pick a random cq
        ranges.add(Range.exact(row.toString(), "ycsb", "field" + rand.nextInt(10)));
    }

    return ranges;
}

From source file:com.github.seqware.queryengine.plugins.contribs.DonorsToMutationsAndGenesAggregationPlugin.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
    // key is feature set, value is mutation->gene that can just be cat'd
    Text newVal = new Text();
    StringBuilder newValSB = new StringBuilder();
    newValSB.append(key).append("\t");
    boolean first = true;
    for (Text val : values) {
        if (first) {
            first = false;/*from w w w.j  a  v  a 2  s  .c o  m*/
        } else {
            newValSB.append(";");
        }
        newValSB.append(val.toString());
    }
    newVal.set(newValSB.toString());
    reducerInterface.write(newVal, null);
}