List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:org.apache.jena.tdbloader4.FirstDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from www . ja v a 2 s . c o m Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } Job job = new Job(configuration); job.setJobName(Constants.NAME_FIRST); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(NQuadsInputFormat.class); job.setMapperClass(FirstMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(FirstReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.jena.tdbloader4.InferDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.printf("Usage: %s [generic options] <vocabulary> <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w. j a va 2s . com*/ Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[2]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[2]), true); } // All the mappers need to have the vocabulary/ontology available, typically they are very small Path vocabulary = new Path(args[0]); DistributedCache.addCacheFile(vocabulary.toUri(), configuration); Job job = new Job(configuration); job.setJobName(Constants.NAME_INFER); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setInputFormatClass(NQuadsInputFormat.class); job.setMapperClass(InferMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // map only job job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner.java
License:Apache License
@SuppressWarnings("unchecked") private void init(String indexName, Configuration conf) { log.debug("init({}, {})", indexName, conf); try {//from ww w . ja va 2s . c om String parts = getPartitionFile(conf); final Path partFile = new Path(parts + "_" + indexName); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); log.debug("FileSystem is {}", fs); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); log.debug("Map output key class is {}", keyClass.getSimpleName()); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); numReduceTasks = job.getNumReduceTasks(); log.debug("Found {} split points, number of reducers is {}", splitPoints.length, numReduceTasks); if (splitPoints.length != (numReduceTasks / 9) - 1) { log.debug("Split points are {} which is different from {}", splitPoints.length, (numReduceTasks / 9) - 1); throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { log.debug("Split points are out of order"); throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); Node<?> partitions = null; if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default // depth limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } log.debug("Adding {} to {}", partitions, this.partitions); this.partitions.put(indexName, partitions); } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } log.debug("init({}, {}) finished.", indexName, conf); }
From source file:org.apache.jena.tdbloader4.SecondDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }// w ww . j ava2s. c o m Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } Job job = new Job(configuration); job.setJobName(Constants.NAME_SECOND); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(NQuadsInputFormat.class); job.setMapperClass(SecondMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(SecondReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(SequenceFileOutputFormat.class); if (useCompression) { SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); } if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.jena.tdbloader4.StatsDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from ww w. ja v a2 s.com*/ Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = new Job(configuration); job.setJobName(Constants.NAME_STATS); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(NQuadsInputFormat.class); job.setMapperClass(StatsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(StatsReducer.class); job.setReducerClass(StatsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // we use the combiner, 1 reducer here is not a problem configuration.set(Constants.OPTION_NUM_REDUCERS, "1"); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.jena.tdbloader4.ThirdDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//w w w. j av a 2 s.c o m log.debug("input: {}, output: {}", args[0], args[1]); Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); log.debug("Compression is {}", useCompression ? "enabled" : "disabled"); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } Job job = new Job(configuration); job.setJobName(Constants.NAME_THIRD); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.setInputPathFilter(job, ExcludeNodeTableFilter.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(ThirdMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(ThirdReducer.class); job.setOutputKeyClass(LongQuadWritable.class); job.setOutputValueClass(NullWritable.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(SequenceFileOutputFormat.class); if (useCompression) { SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); } if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.jena.tdbloader4.Utils.java
License:Apache License
public static void setReducers(Job job, Configuration configuration, Logger log) { boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT); int num_reducers = configuration.getInt(Constants.OPTION_NUM_REDUCERS, Constants.OPTION_NUM_REDUCERS_DEFAULT); // TODO: should we comment this out and let Hadoop decide the number of reducers? if (runLocal) { if (log != null) log.debug("Setting number of reducers to {}", 1); job.setNumReduceTasks(1);/*from ww w .j av a 2 s . c om*/ } else { if (Constants.NAME_FOURTH.equals(job.getJobName())) { job.setPartitionerClass(TotalOrderPartitioner.class); num_reducers = 9 * num_reducers; } job.setNumReduceTasks(num_reducers); if (log != null) log.debug("Setting number of reducers to {}", num_reducers); } }
From source file:org.apache.kudu.mapreduce.tools.ImportCsvMapper.java
License:Apache License
/** * Handles initializing this class with objects specific to it (i.e., the parser). *//*from w w w . j av a 2s. com*/ @Override protected void setup(Context context) { Configuration conf = context.getConfiguration(); this.separator = conf.get(ImportCsv.SEPARATOR_CONF_KEY); if (this.separator == null) { this.separator = ImportCsv.DEFAULT_SEPARATOR; } this.skipBadLines = conf.getBoolean(ImportCsv.SKIP_LINES_CONF_KEY, true); this.badLineCount = context.getCounter(ImportCsv.Counters.BAD_LINES); this.parser = new CsvParser(conf.get(ImportCsv.COLUMNS_NAMES_KEY), this.separator); this.table = KuduTableMapReduceUtil.getTableFromContext(context); this.schema = this.table.getSchema(); }
From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context, final OutputCommitter committer) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputdir = ((FileOutputCommitter) committer).getWorkPath(); final Configuration conf = context.getConfiguration(); LOG.debug("Task output path: " + outputdir); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);//from w ww . j a v a2 s . c o m // create a map from column family to the compression algorithm final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf); final DataBlockEncoding overriddenEncoding; if (dataBlockEncodingStr != null) { overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } else { overriddenEncoding = null; } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; @Override public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = CellUtil.cloneRow(kv); long length = kv.getLength(); byte[] family = CellUtil.cloneFamily(kv); WriterLength wl = this.writers.get(family); if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important") private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); Algorithm compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; BloomType bloomType = bloomTypeMap.get(family); bloomType = bloomType == null ? BloomType.NONE : bloomType; Integer blockSize = blockSizeMap.get(family); blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; DataBlockEncoding encoding = overriddenEncoding; encoding = encoding == null ? datablockEncodingMap.get(family) : encoding; encoding = encoding == null ? DataBlockEncoding.NONE : encoding; Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize); contextBuilder.withDataBlockEncoding(encoding); HFileContext hFileContext = contextBuilder.build(); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .withFileContext(hFileContext).build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } @Override public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:org.apache.lens.cube.parse.AggregateResolver.java
License:Apache License
@Override public void rewriteContext(CubeQueryContext cubeql) throws LensException { if (cubeql.getCube() == null) { return;//from ww w. jav a2 s. c o m } boolean nonDefaultAggregates = false; boolean aggregateResolverDisabled = cubeql.getConf().getBoolean( CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER, CubeQueryConfUtil.DEFAULT_DISABLE_AGGREGATE_RESOLVER); // Check if the query contains measures // 1. not inside default aggregate expressions // 2. With no default aggregate defined // 3. there are distinct selection of measures // If yes, only the raw (non aggregated) fact can answer this query. // In that case remove aggregate facts from the candidate fact list if (hasMeasuresInDistinctClause(cubeql, cubeql.getSelectAST(), false) || hasMeasuresInDistinctClause(cubeql, cubeql.getHavingAST(), false) || hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getSelectAST(), null, aggregateResolverDisabled) || hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getHavingAST(), null, aggregateResolverDisabled) || hasMeasures(cubeql, cubeql.getWhereAST()) || hasMeasures(cubeql, cubeql.getGroupByAST()) || hasMeasures(cubeql, cubeql.getOrderByAST())) { Iterator<CandidateFact> factItr = cubeql.getCandidateFacts().iterator(); while (factItr.hasNext()) { CandidateFact candidate = factItr.next(); if (candidate.fact.isAggregated()) { cubeql.addFactPruningMsgs(candidate.fact, CandidateTablePruneCause.missingDefaultAggregate()); factItr.remove(); } } nonDefaultAggregates = true; log.info("Query has non default aggregates, no aggregate resolution will be done"); } cubeql.pruneCandidateFactSet(CandidateTablePruneCode.MISSING_DEFAULT_AGGREGATE); if (nonDefaultAggregates || aggregateResolverDisabled) { return; } resolveClause(cubeql, cubeql.getSelectAST()); resolveClause(cubeql, cubeql.getHavingAST()); Configuration distConf = cubeql.getConf(); boolean isDimOnlyDistinctEnabled = distConf.getBoolean(CubeQueryConfUtil.ENABLE_ATTRFIELDS_ADD_DISTINCT, CubeQueryConfUtil.DEFAULT_ATTR_FIELDS_ADD_DISTINCT); //Having clause will always work with measures, if only keys projected //query should skip distinct and promote group by. if (cubeql.getHavingAST() == null && isDimOnlyDistinctEnabled) { // Check if any measure/aggregate columns and distinct clause used in // select tree. If not, update selectAST token "SELECT" to "SELECT DISTINCT" if (!hasMeasures(cubeql, cubeql.getSelectAST()) && !isDistinctClauseUsed(cubeql.getSelectAST()) && !HQLParser.hasAggregate(cubeql.getSelectAST()) && !isAggregateDimExprUsedInSelect(cubeql, cubeql.getSelectAST())) { cubeql.getSelectAST().getToken().setType(HiveParser.TOK_SELECTDI); } } }