Example usage for org.apache.hadoop.conf Configuration get

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:co.cask.cdap.hive.stream.HiveStreamInputFormat.java

License:Apache License

/**
 * Setups the given {@link StreamInputSplitFinder.Builder} by analyzing the query.
 *///from   w  w  w.jav a  2  s . c  om
private StreamInputSplitFinder.Builder setupBuilder(Configuration conf, StreamConfig streamConfig,
        StreamInputSplitFinder.Builder builder) {
    // the conf contains a 'hive.io.filter.expr.serialized' key which contains the serialized form of ExprNodeDesc
    long startTime = Math.max(0L, System.currentTimeMillis() - streamConfig.getTTL());
    long endTime = System.currentTimeMillis();

    String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (serializedExpr == null) {
        return builder.setStartTime(startTime).setEndTime(endTime);
    }

    try {
        ExprNodeGenericFuncDesc expr;
        // Hack to deal with the fact that older versions of Hive use
        // Utilities.deserializeExpression(String, Configuration),
        // whereas newer versions use Utilities.deserializeExpression(String).
        try {
            expr = Utilities.deserializeExpression(serializedExpr);
        } catch (NoSuchMethodError e) {
            expr = (ExprNodeGenericFuncDesc) Utilities.class
                    .getMethod("deserializeExpression", String.class, Configuration.class)
                    .invoke(null, serializedExpr, conf);
        }

        // Analyze the query to extract predicates that can be used for indexing (i.e. setting start/end time)
        IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
        for (CompareOp op : CompareOp.values()) {
            analyzer.addComparisonOp(op.getOpClassName());
        }

        // Stream can only be indexed by timestamp
        analyzer.clearAllowedColumnNames();
        analyzer.allowColumnName("ts");

        List<IndexSearchCondition> conditions = Lists.newArrayList();
        analyzer.analyzePredicate(expr, conditions);

        for (IndexSearchCondition condition : conditions) {
            CompareOp op = CompareOp.from(condition.getComparisonOp());
            if (op == null) {
                // Not a supported operation
                continue;
            }
            ExprNodeConstantDesc value = condition.getConstantDesc();
            if (value == null || !(value.getValue() instanceof Long)) {
                // Not a supported value
                continue;
            }

            long timestamp = (Long) value.getValue();
            // If there is a equal, set both start and endtime and no need to inspect further
            if (op == CompareOp.EQUAL) {
                startTime = timestamp;
                endTime = (timestamp < Long.MAX_VALUE) ? timestamp + 1L : timestamp;
                break;
            }
            if (op == CompareOp.GREATER || op == CompareOp.EQUAL_OR_GREATER) {
                // Plus 1 for the start time if it is greater since start time is inclusive in stream
                startTime = Math.max(startTime,
                        timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.GREATER ? 1L : 0L));
            } else {
                // Plus 1 for end time if it is equal or less since end time is exclusive in stream
                endTime = Math.min(endTime,
                        timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.EQUAL_OR_LESS ? 1L : 0L));
            }
        }
    } catch (Throwable t) {
        LOG.warn("Exception analyzing query predicate. A full table scan will be performed.", t);
    }

    return builder.setStartTime(startTime).setEndTime(endTime);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java

License:Apache License

@Override
public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    DataSetInputSplit inputSplit = (DataSetInputSplit) split;
    Configuration conf = context.getConfiguration();

    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);

    @SuppressWarnings("unchecked")
    BatchReadable<KEY, VALUE> batchReadable = createBatchReadable(context, datasetName, datasetArgs);
    SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit());
    return new SplitReaderRecordReader<>(splitReader);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String datasetName = conf.get(DATASET_NAME);
    Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE);
    return new BatchWritableRecordWriter<>(createBatchWritable(context, datasetName, datasetArgs));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration hConf = context.getConfiguration();
    if (hConf.get(DATASET_NAME) == null || hConf.get(DATASET_ARGS) == null) {
        throw new IOException("Dataset configurations are missing in the job configuration");
    }/* w  ww .j  ava 2 s  .c  om*/
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java

License:Apache License

private String getInputName(Configuration conf) {
    return conf.get(HCONF_ATTR_INPUT_DATASET);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetOutputFormat.java

License:Apache License

private String getOutputDataSet(Configuration conf) {
    return conf.get(HCONF_ATTR_OUTPUT_DATASET);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputs.java

License:Apache License

/**
 * @param conf the Configuration from which to deserialize the input configurations
 * @return a mapping from input name to the MapperInput for that input
 */// w w  w  .j  av a  2  s.  com
public static Map<String, MapperInput> getInputMap(Configuration conf) {
    String mapString = conf.get(INPUT_CONFIGS);
    if (mapString == null) {
        return new HashMap<>();
    }
    return GSON.fromJson(mapString, STRING_MAPPER_INPUT_MAP_TYPE);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java

License:Apache License

DynamicPartitionerWriterWrapper(TaskAttemptContext job) {
    this.job = job;
    this.outputName = DynamicPartitioningOutputFormat.getOutputName(job);

    Configuration configuration = job.getConfiguration();
    Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass(
            PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);
    this.dynamicPartitioner = new InstantiatorFactory(false).get(TypeToken.of(partitionerClass)).create();

    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    this.taskContext = classLoader.getTaskContextProvider().get(job);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    this.partitioning = outputDataset.getPartitioning();

    this.dynamicPartitioner.initialize(taskContext);
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);//  www .j  ava  2s  .  c om
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to different output files.
 *
 * @return a composite record writer/* ww  w  .j  av  a2s .  c o m*/
 * @throws IOException
 */
@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException {
    final String outputName = FileOutputFormat.getOutputName(job);

    Configuration configuration = job.getConfiguration();
    Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass(
            PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);

    @SuppressWarnings("unchecked")
    final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false)
            .get(TypeToken.of(partitionerClass)).create();

    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    final Partitioning partitioning = outputDataset.getPartitioning();

    dynamicPartitioner.initialize(taskContext);

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>();

        public void write(K key, V value) throws IOException, InterruptedException {
            PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
            RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
            if (rw == null) {
                String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning);
                String finalPath = relativePath + "/" + outputName;

                // if we don't have the record writer yet for the final path, create one and add it to the cache
                rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath));
                this.recordWriters.put(partitionKey, rw);
            }
            rw.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            try {
                List<RecordWriter<?, ?>> recordWriters = new ArrayList<>();
                recordWriters.addAll(this.recordWriters.values());
                MultipleOutputs.closeRecordWriters(recordWriters, context);

                taskContext.flushOperations();
            } catch (Exception e) {
                throw new IOException(e);
            } finally {
                dynamicPartitioner.destroy();
            }
        }
    };
}