List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:co.cask.cdap.hive.stream.HiveStreamInputFormat.java
License:Apache License
/** * Setups the given {@link StreamInputSplitFinder.Builder} by analyzing the query. *///from w w w.jav a 2 s . c om private StreamInputSplitFinder.Builder setupBuilder(Configuration conf, StreamConfig streamConfig, StreamInputSplitFinder.Builder builder) { // the conf contains a 'hive.io.filter.expr.serialized' key which contains the serialized form of ExprNodeDesc long startTime = Math.max(0L, System.currentTimeMillis() - streamConfig.getTTL()); long endTime = System.currentTimeMillis(); String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (serializedExpr == null) { return builder.setStartTime(startTime).setEndTime(endTime); } try { ExprNodeGenericFuncDesc expr; // Hack to deal with the fact that older versions of Hive use // Utilities.deserializeExpression(String, Configuration), // whereas newer versions use Utilities.deserializeExpression(String). try { expr = Utilities.deserializeExpression(serializedExpr); } catch (NoSuchMethodError e) { expr = (ExprNodeGenericFuncDesc) Utilities.class .getMethod("deserializeExpression", String.class, Configuration.class) .invoke(null, serializedExpr, conf); } // Analyze the query to extract predicates that can be used for indexing (i.e. setting start/end time) IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); for (CompareOp op : CompareOp.values()) { analyzer.addComparisonOp(op.getOpClassName()); } // Stream can only be indexed by timestamp analyzer.clearAllowedColumnNames(); analyzer.allowColumnName("ts"); List<IndexSearchCondition> conditions = Lists.newArrayList(); analyzer.analyzePredicate(expr, conditions); for (IndexSearchCondition condition : conditions) { CompareOp op = CompareOp.from(condition.getComparisonOp()); if (op == null) { // Not a supported operation continue; } ExprNodeConstantDesc value = condition.getConstantDesc(); if (value == null || !(value.getValue() instanceof Long)) { // Not a supported value continue; } long timestamp = (Long) value.getValue(); // If there is a equal, set both start and endtime and no need to inspect further if (op == CompareOp.EQUAL) { startTime = timestamp; endTime = (timestamp < Long.MAX_VALUE) ? timestamp + 1L : timestamp; break; } if (op == CompareOp.GREATER || op == CompareOp.EQUAL_OR_GREATER) { // Plus 1 for the start time if it is greater since start time is inclusive in stream startTime = Math.max(startTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.GREATER ? 1L : 0L)); } else { // Plus 1 for end time if it is equal or less since end time is exclusive in stream endTime = Math.min(endTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.EQUAL_OR_LESS ? 1L : 0L)); } } } catch (Throwable t) { LOG.warn("Exception analyzing query predicate. A full table scan will be performed.", t); } return builder.setStartTime(startTime).setEndTime(endTime); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchReadableInputFormat.java
License:Apache License
@Override public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { DataSetInputSplit inputSplit = (DataSetInputSplit) split; Configuration conf = context.getConfiguration(); String datasetName = conf.get(DATASET_NAME); Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE); @SuppressWarnings("unchecked") BatchReadable<KEY, VALUE> batchReadable = createBatchReadable(context, datasetName, datasetArgs); SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit()); return new SplitReaderRecordReader<>(splitReader); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java
License:Apache License
@Override public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String datasetName = conf.get(DATASET_NAME); Map<String, String> datasetArgs = GSON.fromJson(conf.get(DATASET_ARGS), DATASET_ARGS_TYPE); return new BatchWritableRecordWriter<>(createBatchWritable(context, datasetName, datasetArgs)); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.AbstractBatchWritableOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { Configuration hConf = context.getConfiguration(); if (hConf.get(DATASET_NAME) == null || hConf.get(DATASET_ARGS) == null) { throw new IOException("Dataset configurations are missing in the job configuration"); }/* w ww .j ava 2 s .c om*/ }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetInputFormat.java
License:Apache License
private String getInputName(Configuration conf) { return conf.get(HCONF_ATTR_INPUT_DATASET); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.DataSetOutputFormat.java
License:Apache License
private String getOutputDataSet(Configuration conf) { return conf.get(HCONF_ATTR_OUTPUT_DATASET); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputs.java
License:Apache License
/** * @param conf the Configuration from which to deserialize the input configurations * @return a mapping from input name to the MapperInput for that input */// w w w .j av a 2 s. com public static Map<String, MapperInput> getInputMap(Configuration conf) { String mapString = conf.get(INPUT_CONFIGS); if (mapString == null) { return new HashMap<>(); } return GSON.fromJson(mapString, STRING_MAPPER_INPUT_MAP_TYPE); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java
License:Apache License
DynamicPartitionerWriterWrapper(TaskAttemptContext job) { this.job = job; this.outputName = DynamicPartitioningOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); this.dynamicPartitioner = new InstantiatorFactory(false).get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); this.taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); this.partitioning = outputDataset.getPartitioning(); this.dynamicPartitioner.initialize(taskContext); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);// www .j ava 2s . c om continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to different output files. * * @return a composite record writer/* ww w .j av a2s . c o m*/ * @throws IOException */ @Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException { final String outputName = FileOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); @SuppressWarnings("unchecked") final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false) .get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); final Partitioning partitioning = outputDataset.getPartitioning(); dynamicPartitioner.initialize(taskContext); return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>(); public void write(K key, V value) throws IOException, InterruptedException { PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value); RecordWriter<K, V> rw = this.recordWriters.get(partitionKey); if (rw == null) { String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; // if we don't have the record writer yet for the final path, create one and add it to the cache rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath)); this.recordWriters.put(partitionKey, rw); } rw.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { try { List<RecordWriter<?, ?>> recordWriters = new ArrayList<>(); recordWriters.addAll(this.recordWriters.values()); MultipleOutputs.closeRecordWriters(recordWriters, context); taskContext.flushOperations(); } catch (Exception e) { throw new IOException(e); } finally { dynamicPartitioner.destroy(); } } }; }