Example usage for org.apache.hadoop.conf Configuration getValByRegex

List of usage examples for org.apache.hadoop.conf Configuration getValByRegex


In this page you can find the example usage for org.apache.hadoop.conf Configuration getValByRegex.


public Map<String, String> getValByRegex(String regex) 

Source Link


get keys matching the the regex


From source file:hydrograph.engine.hive.scheme.HivePartWrite.java

License:Apache License

public static void main(String args[]) throws IOException {
    Configuration conf = new Configuration();

    String[] otherArgs;/*ww w  .  ja v a2  s .  c  om*/
    otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // print other args
    String argsString = "";
    for (String arg : otherArgs) {
        argsString = argsString + " " + arg;
    System.out.println("After processing arguments are:" + argsString);

    Properties properties = new Properties();

    Fields fields = new Fields("a", "b", "c").applyTypes(String.class, String.class, String.class);

    Tap source = new Hfs(new TextDelimited(fields, ","), "/data/file.txt");

    HiveTableDescriptor hiveTableDescriptor = new HiveTableDescriptor("testp14", new String[] { "a", "b", "c" },
            new String[] { "string", "string", "string" }, new String[] { "a" });
    HiveTap hivetap = new HiveTap(hiveTableDescriptor, new HiveParquetScheme(hiveTableDescriptor),
            SinkMode.REPLACE, false);
    Tap sink = new HivePartitionTap(hivetap);

    Pipe pipe = new Pipe("pipe");

    properties.put("hive.metastore.uris", "thrift://UbuntuD5.bitwiseglobal.net:9083");
    FlowDef def = FlowDef.flowDef().addSource(pipe, source).addTailSink(pipe, sink);

    new Hadoop2MR1FlowConnector(properties).connect(def).complete();


From source file:hydrograph.engine.hive.scheme.ParquetHiveTest.java

License:Apache License

public static void main(String args[]) throws IOException {

    Configuration conf = new Configuration();
    String[] otherArgs;// www .j  a  v  a 2s .  c  om
    otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String argsString = "";
    for (String arg : otherArgs) {
        argsString = argsString + " " + arg;
    System.out.println("After processing arguments are:" + argsString);
    Properties properties = new Properties();
    DateType dt = new DateType("yyyy-MM-dd");
    Fields fields = new Fields("a", "b", "c", "d", "e", "f").applyTypes(String.class, Integer.class, Long.class,
            BigDecimal.class, dt, String.class);
    Tap source = new Hfs(new TextDelimited(fields, true, ","), "data/output_testalltype");

    HiveTableDescriptor hiveTableDescriptor = new HiveTableDescriptor("testing23",
            new String[] { "a", "b", "c", "d", "e", "new" },
            new String[] { "string", "int", "bigint", "decimal(10,2)", "date", "array<int>" });

    Tap sink = new HiveTap(hiveTableDescriptor, new HiveParquetScheme(hiveTableDescriptor), SinkMode.REPLACE,

    Pipe pipe = new Pipe("pipe");

    pipe = new Each(pipe, fields, new Custome1(new Fields("new")), new Fields("a", "b", "c", "d", "e", "new"));

    FlowDef def = FlowDef.flowDef().addSource(pipe, source).addTailSink(pipe, sink);

    new Hadoop2MR1FlowConnector(properties).connect(def).complete();


From source file:hydrograph.server.debug.lingual.LingualFilter.java

License:Apache License

private Properties getProperties(Configuration conf) {
    Properties properties = new Properties();
    return properties;

From source file:io.svectors.hbase.cdc.config.KafkaConfiguration.java

License:Apache License

 * Filters the configuration for/*from  ww  w  . j  av  a 2  s  .  com*/
 * @param configuration
 * @return
private Map<String, Object> filter(final Configuration configuration) {
    final Map<String, Object> kafkaProperties = configuration.getValByRegex(KAFKA_PREFIX_KEY).entrySet()
            .stream().collect(toMap(e -> e.getKey().substring(KAFKA_PREFIX_KEY.length()), e -> e.getValue()));
    return kafkaProperties;

From source file:org.apache.crunch.kafka.inputformat.KafkaInputFormat.java

License:Apache License

 * Reads the {@code configuration} to determine which topics, partitions, and offsets should be used for reading data.
 * @param configuration the configuration to derive the data to read.
 * @return a map of {@link TopicPartition} to a pair of start and end offsets.
 * @throws IllegalStateException if the {@code configuration} does not have the start and end offsets set properly
 * for a partition./*from  w  w  w. j  a va  2  s .co  m*/
public static Map<TopicPartition, Pair<Long, Long>> getOffsets(Configuration configuration) {
    Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
    //find configuration for all of the topics with defined partitions
    Map<String, String> topicPartitionKeys = configuration.getValByRegex(TOPIC_KEY_REGEX);

    //for each topic start to process it's partitions
    for (String key : topicPartitionKeys.keySet()) {
        String topic = getTopicFromKey(key);
        int[] partitions = configuration.getInts(key);
        //for each partition find and add the start/end offset
        for (int partitionId : partitions) {
            TopicPartition topicPartition = new TopicPartition(topic, partitionId);
            long start = configuration.getLong(generatePartitionStartKey(topic, partitionId), Long.MIN_VALUE);
            long end = configuration.getLong(generatePartitionEndKey(topic, partitionId), Long.MIN_VALUE);

            if (start == Long.MIN_VALUE || end == Long.MIN_VALUE) {
                throw new IllegalStateException("The " + topicPartition + "has an invalid start:" + start
                        + " or end:" + end + " offset configured.");

            offsets.put(topicPartition, Pair.of(start, end));

    return offsets;

From source file:org.apache.crunch.kafka.record.KafkaInputFormat.java

License:Apache License

 * Reads the {@code configuration} to determine which topics, partitions, and offsets should be used for reading data.
 * @param configuration the configuration to derive the data to read.
 * @return a map of {@link TopicPartition} to a pair of start and end offsets.
 * @throws IllegalStateException if the {@code configuration} does not have the start and end offsets set properly
 *                               for a partition.
 *//* ww  w  . ja  va  2  s. com*/
public static Map<TopicPartition, Pair<Long, Long>> getOffsets(Configuration configuration) {
    Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
    //find configuration for all of the topics with defined partitions
    Map<String, String> topicPartitionKeys = configuration.getValByRegex(TOPIC_KEY_REGEX);

    //for each topic start to process it's partitions
    for (String key : topicPartitionKeys.keySet()) {
        String topic = getTopicFromKey(key);
        int[] partitions = configuration.getInts(key);
        //for each partition find and add the start/end offset
        for (int partitionId : partitions) {
            TopicPartition topicPartition = new TopicPartition(topic, partitionId);
            long start = configuration.getLong(generatePartitionStartKey(topic, partitionId), Long.MIN_VALUE);
            long end = configuration.getLong(generatePartitionEndKey(topic, partitionId), Long.MIN_VALUE);

            if (start == Long.MIN_VALUE || end == Long.MIN_VALUE) {
                throw new IllegalStateException("The " + topicPartition + " has an invalid start:" + start
                        + " or end:" + end + " offset configured.");

            offsets.put(topicPartition, Pair.of(start, end));

    return offsets;

From source file:org.apache.hive.storage.jdbc.dao.GenericJdbcDatabaseAccessor.java

License:Apache License

protected Properties getConnectionPoolProperties(Configuration conf) throws Exception {
    // Create the default properties object
    Properties dbProperties = getDefaultDBCPProperties();

    // override with user defined properties
    Map<String, String> userProperties = conf.getValByRegex(DBCP_CONFIG_PREFIX + "\\.*");
    if ((userProperties != null) && (!userProperties.isEmpty())) {
        for (Entry<String, String> entry : userProperties.entrySet()) {
            dbProperties.put(entry.getKey().replaceFirst(DBCP_CONFIG_PREFIX + "\\.", ""), entry.getValue());
        }/* w  ww  . j a v  a  2s.  c om*/

    // handle password
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    if (credentials.getSecretKey(DBCP_PWD) != null) {
        LOGGER.info("found token in credentials");
        dbProperties.put(DBCP_PWD, new String(credentials.getSecretKey(DBCP_PWD)));

    // essential properties that shouldn't be overridden by users
    dbProperties.put("url", conf.get(JdbcStorageConfig.JDBC_URL.getPropertyName()));
    dbProperties.put("driverClassName", conf.get(JdbcStorageConfig.JDBC_DRIVER_CLASS.getPropertyName()));
    dbProperties.put("type", "javax.sql.DataSource");
    return dbProperties;

From source file:org.apache.lens.driver.hive.HiveDriver.java

License:Apache License

public LensResultSet execute(QueryContext ctx) throws LensException {
    OperationHandle op = null;/*www. j av a  2  s .c om*/
    LensResultSet result = null;
    try {
        Configuration qdconf = ctx.getDriverConf(this);
        qdconf.set("mapred.job.name", ctx.getQueryHandle().toString());
        SessionHandle sessionHandle = getSession(ctx);
        op = getClient().executeStatement(sessionHandle, ctx.getSelectedDriverQuery(),
        log.info("The hive operation handle: {}", op);
        hiveHandles.put(ctx.getQueryHandle(), op);
        opHandleToSession.put(op, sessionHandle);
        OperationStatus status = getClient().getOperationStatus(op);
        if (status.getState() == OperationState.ERROR) {
            throw new LensException("Unknown error while running query " + ctx.getUserQuery());
        result = createResultSet(ctx, true);
        // close the query immediately if the result is not inmemory result set
        if (result == null || !(result instanceof InMemoryResultSet)) {
        // remove query handle from hiveHandles even in case of inmemory result set
    } catch (IOException e) {
        throw new LensException("Error adding persistent path", e);
    } catch (HiveSQLException hiveErr) {
        handleHiveServerError(ctx, hiveErr);
    } finally {
        if (null != op) {
    return result;

From source file:org.apache.lens.driver.hive.HiveDriver.java

License:Apache License

public void executeAsync(QueryContext ctx) throws LensException {
    try {/*from  www  .  j  a v a  2  s.  co m*/
        Configuration qdconf = ctx.getDriverConf(this);
        qdconf.set("mapred.job.name", ctx.getQueryHandle().toString());
        SessionHandle sessionHandle = getSession(ctx);
        OperationHandle op = getClient().executeStatementAsync(sessionHandle, ctx.getSelectedDriverQuery(),
        log.info("QueryHandle: {} HiveHandle:{}", ctx.getQueryHandle(), op);
        hiveHandles.put(ctx.getQueryHandle(), op);
        opHandleToSession.put(op, sessionHandle);
    } catch (IOException e) {
        throw new LensException("Error adding persistent path", e);
    } catch (HiveSQLException e) {
        handleHiveServerError(ctx, e);

From source file:org.apache.lens.server.session.LensSessionImpl.java

License:Apache License

public static Map<String, String> getHiveSessionConf() {
    Configuration defaultConf = createDefaultConf();
    return defaultConf.getValByRegex("hive.*");