List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.splicemachine.mrio.api.core.SMInputFormat.java
License:Apache License
@Override public void setConf(Configuration conf) { if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "setConf conf=%s", conf); this.conf = conf; String tableName = conf.get(MRConstants.SPLICE_INPUT_TABLE_NAME); String conglomerate = conf.get(MRConstants.SPLICE_INPUT_CONGLOMERATE); String tableScannerAsString = conf.get(MRConstants.SPLICE_SCAN_INFO); spark = tableScannerAsString != null; conf.setBoolean("splice.spark", spark); String jdbcString = conf.get(MRConstants.SPLICE_JDBC_STR); String rootDir = conf.get(HConstants.HBASE_DIR); if (util == null && jdbcString != null) util = SMSQLUtil.getInstance(jdbcString); if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG,/*from w ww .ja va 2s.c o m*/ "setConf tableName=%s, conglomerate=%s, tableScannerAsString=%s" + "jdbcString=%s, rootDir=%s", tableName, conglomerate, tableScannerAsString, jdbcString, rootDir); if (conglomerate == null && !spark) { LOG.error("Conglomerate not provided when spark is activated"); throw new RuntimeException("Conglomerate not provided when spark is activated"); } if (tableName == null && conglomerate == null) { LOG.error("Table Name Supplied is null"); throw new RuntimeException("Table Name Supplied is Null"); } if (conglomerate == null) { if (jdbcString == null) { LOG.error("JDBC String Not Supplied"); throw new RuntimeException("JDBC String Not Supplied"); } try { conglomerate = util.getConglomID(tableName); conf.set(MRConstants.SPLICE_INPUT_CONGLOMERATE, conglomerate); } catch (SQLException e) { LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); } } try { if (SIDriver.driver() == null) SpliceSpark.setupSpliceStaticComponents(); PartitionFactory tableFactory = SIDriver.driver().getTableFactory(); setHTable(((ClientPartition) tableFactory.getTable(conglomerate)).unwrapDelegate()); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } if (tableScannerAsString == null) { if (jdbcString == null) { LOG.error("JDBC String Not Supplied"); throw new RuntimeException("JDBC String Not Supplied"); } try { conf.set(MRConstants.SPLICE_SCAN_INFO, util.getTableScannerBuilder(tableName, null).base64Encode()); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); } } if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "finishingSetConf"); }
From source file:com.splicemachine.orc.OrcConf.java
License:Open Source License
public static void setBoolVar(Configuration conf, OrcConf.ConfVars var, boolean val) { conf.setBoolean(var.varname, val); }
From source file:com.splicemachine.test.SpliceTestPlatformConfig.java
License:Apache License
public static Configuration create(String hbaseRootDirUri, Integer masterPort, Integer masterInfoPort, Integer regionServerPort, Integer regionServerInfoPort, Integer derbyPort, boolean failTasksRandomly) { Configuration config = HConfiguration.unwrapDelegate(); config.set(SQLConfiguration.STORAGE_FACTORY_HOME, hbaseRootDirUri); //// w ww . j a v a 2 s . co m // Coprocessors // config.set("hbase.coprocessor.regionserver.classes", getRegionServerCoprocessorsAsString()); config.set("hbase.coprocessor.region.classes", getRegionCoprocessorsAsString()); config.set("hbase.coprocessor.master.classes", getMasterCoprocessorsAsString()); // // Networking // config.set("hbase.zookeeper.quorum", "127.0.0.1:2181"); config.setInt("hbase.master.port", masterPort); config.setInt("hbase.master.info.port", masterInfoPort); config.setInt("hbase.regionserver.port", regionServerPort); config.setInt("hbase.regionserver.info.port", regionServerInfoPort); config.setInt("hbase.master.jmx.port", HConfiguration.DEFAULT_JMX_BIND_PORT); // this is set because the HBase master and regionserver are running on the same machine and in the same JVM config.setInt(SQLConfiguration.NETWORK_BIND_PORT, derbyPort); config.setClass(DefaultStoreEngine.DEFAULT_COMPACTOR_CLASS_KEY, SpliceDefaultCompactor.class, Compactor.class); // config.setClass(ConsistencyControlUtils.MVCC_IMPL, SIMultiVersionConsistencyControl.class, ConsistencyControl.class); config.setClass(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY, SpliceDefaultCompactionPolicy.class, CompactionPolicy.class); // // Networking -- interfaces // // force use of loop back interface on MacOSX, else don't set it // if (System.getProperty("os.name").contains("Mac") ) { // String interfaceName = "lo0"; // config.set("hbase.zookeeper.dns.interface", interfaceName); // config.set("hbase.master.dns.interface", interfaceName); // config.set("hbase.regionserver.dns.interface", interfaceName); // } // // File System // config.set("fs.defaultFS", "file:///"); // MapR Hack, tells it local filesystem // fs.default.name is deprecated config.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); config.setDouble("yarn.nodemanager.resource.io-spindles", 2.0); config.set("fs.default.name", "file:///"); config.set("yarn.nodemanager.container-executor.class", "org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor"); // Must allow Cygwin instance to config its own rootURI if (!"CYGWIN".equals(hbaseRootDirUri)) { config.set("hbase.rootdir", hbaseRootDirUri); } // // Threads, timeouts // config.setLong("hbase.rpc.timeout", MINUTES.toMillis(2)); config.setLong("hbase.client.scanner.timeout.period", MINUTES.toMillis(2)); // hbase.regionserver.lease.period is deprecated config.setLong("hbase.client.operation.timeout", MINUTES.toMillis(2)); config.setLong("hbase.regionserver.handler.count", 200); config.setLong("hbase.regionserver.msginterval", 1000); config.setLong("hbase.master.event.waiting.time", 20); config.setLong("hbase.master.lease.thread.wakefrequency", SECONDS.toMillis(3)); // config.setBoolean("hbase.master.loadbalance.bytable",true); config.setInt("hbase.balancer.period", 5000); config.setLong("hbase.server.thread.wakefrequency", SECONDS.toMillis(1)); config.setLong("hbase.client.pause", 100); // // Compaction Controls // config.setLong("hbase.hstore.compaction.min", 5); // min number of eligible files before we compact config.setLong("hbase.hstore.compaction.max", 10); // max files to be selected for a single minor compaction config.setLong("hbase.hstore.compaction.min.size", 16 * MiB); // store files smaller than this will always be eligible for minor compaction. HFiles this size or larger are evaluated by hbase.hstore.compaction.ratio to determine if they are eligible config.setLong("hbase.hstore.compaction.max.size", 248 * MiB); // store files larger than this will be excluded from compaction config.setFloat("hbase.hstore.compaction.ratio", 1.25f); // default is 1.2f, at one point we had this set to 0.25f and 25f (which was likely a typo) // // Memstore, store files, splits // config.setLong(HConstants.HREGION_MAX_FILESIZE, 32 * MiB); // hbase.hregion.max.filesize config.setLong("hbase.hregion.memstore.flush.size", 128 * MiB); // was 512 MiB config.setLong("hbase.hregion.memstore.block.multiplier", 4); config.setFloat("hbase.regionserver.global.memstore.size", 0.25f); // set mem store to 25% of heap config.setLong("hbase.hstore.blockingStoreFiles", 20); // config.set("hbase.regionserver.region.split.policy", "org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy"); // change default split policy. this makes more sense for a standalone/single regionserver // Support SI //config.setClass(HConstants.MVCC_IMPL, SIMultiVersionConsistencyControl.class, ConsistencyControl.class); // // HFile // config.setInt("hfile.index.block.max.size", 16 * 1024); // 16KiB config.setFloat("hfile.block.cache.size", 0.25f); // set block cache to 25% of heap config.setFloat("io.hfile.bloom.error.rate", (float) 0.005); config.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, true); // hfile.block.bloom.cacheonwrite //config.set("hbase.master.hfilecleaner.plugins", getHFileCleanerAsString()); config.set("hbase.master.hfilecleaner.plugins", getHFileCleanerAsString()); // // Misc // config.set("hbase.cluster.distributed", "true"); // don't start zookeeper for us config.set("hbase.master.distributed.log.splitting", "false"); // TODO: explain why we are setting this // AWS Credentials for test... // config.set(ACCESS_KEY, "AKIAJ6HBMCK5ALHVBFPQ"); config.set(SECRET_KEY, "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"); // // Splice // config.setLong("splice.ddl.drainingWait.maximum", SECONDS.toMillis(15)); // wait 15 seconds before bailing on bad ddl statements config.setLong("splice.ddl.maxWaitSeconds", 120000); // // Snapshots // config.setBoolean("hbase.snapshot.enabled", true); HConfiguration.reloadConfiguration(config); return HConfiguration.unwrapDelegate(); }
From source file:com.spotify.hdfs2cass.BulkLoader.java
License:Apache License
public int run(String[] args) throws Exception { CommandLine cmdLine = parseOptions(args); String[] inputPaths = cmdLine.getOptionValues('i'); String seedNodeHost = cmdLine.getOptionValue('h'); String seedNodePort = cmdLine.getOptionValue('p', "9160"); String keyspace = cmdLine.getOptionValue('k'); String colfamily = cmdLine.getOptionValue('c'); int mappers = Integer.parseInt(cmdLine.getOptionValue('m', "0")); Integer copiers = Integer.parseInt(cmdLine.getOptionValue('P', "0")); String poolName = cmdLine.getOptionValue("pool"); ClusterInfo clusterInfo = new ClusterInfo(seedNodeHost, seedNodePort); clusterInfo.init(keyspace);//from w w w . j a v a 2s . c o m final String partitionerClass = clusterInfo.getPartitionerClass(); final int reducers = adjustReducers(Integer.parseInt(cmdLine.getOptionValue('r', "0")), clusterInfo.getNumClusterNodes()); Configuration conf = new Configuration(); ConfigHelper.setOutputColumnFamily(conf, keyspace, colfamily); ConfigHelper.setOutputInitialAddress(conf, seedNodeHost); ConfigHelper.setOutputRpcPort(conf, seedNodePort); ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (cmdLine.hasOption('s')) { conf.set("mapreduce.output.bulkoutputformat.buffersize", cmdLine.getOptionValue('s', "32")); } if (cmdLine.hasOption('M')) { conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", cmdLine.getOptionValue('M')); } if (cmdLine.hasOption('C')) { ConfigHelper.setOutputCompressionClass(conf, cmdLine.getOptionValue('C')); } if (cmdLine.hasOption('b')) { conf.setBoolean("com.spotify.hdfs2cass.base64", true); } JobConf job = new JobConf(conf); if (mappers > 0) job.setNumMapTasks(mappers); if (reducers > 0) job.setNumReduceTasks(reducers); if (copiers > 0) job.set("mapred.reduce.parallel.copies", copiers.toString()); if (poolName != null) job.set("mapred.fairscheduler.pool", poolName); // set the nodes as a param for the other hadoop nodes clusterInfo.setConf(job); String jobName = "bulkloader-hdfs-to-cassandra"; if (cmdLine.hasOption('n')) jobName += "-" + cmdLine.getOptionValue('n'); job.setJobName(jobName); job.setJarByClass(BulkLoader.class); job.setInputFormat(AvroAsTextInputFormat.class); for (String inputPath : inputPaths) { FileInputFormat.addInputPath(job, new Path(inputPath)); } //map just outputs text, reduce sends to cassandra job.setMapperClass(MapToText.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(CassandraPartitioner.class); job.setReducerClass(ReduceTextToCassandra.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); if (cmdLine.hasOption('s')) job.setOutputFormat(BulkOutputFormat.class); else job.setOutputFormat(ColumnFamilyOutputFormat.class); JobClient.runJob(job); return 0; }
From source file:com.spotify.styx.util.Connections.java
License:Apache License
public static Connection createBigTableConnection(Config config) { final String projectId = config.getString(BIGTABLE_PROJECT_ID); final String instanceId = config.getString(BIGTABLE_INSTANCE_ID); LOG.info("Creating Bigtable connection for project:{}, instance:{}", projectId, instanceId); final Configuration bigtableConfiguration = new Configuration(); bigtableConfiguration.set("google.bigtable.project.id", projectId); bigtableConfiguration.set("google.bigtable.instance.id", instanceId); bigtableConfiguration.setBoolean("google.bigtable.rpc.use.timeouts", true); return BigtableConfiguration.connect(bigtableConfiguration); }
From source file:com.streamsets.datacollector.stage.HadoopConfigurationUtils.java
License:Apache License
public static void configureHadoopTreatSubjectExternal(Configuration conf) { // Not using constant to make this code compile even for stage libraries that do // not have HADOOP-13805 available. conf.setBoolean(HADOOP_SUBJECT_TREAT_EXTERNAL, true); }
From source file:com.streamsets.pipeline.emr.EmrBinding.java
License:Apache License
@Override public void init() throws Exception { Configuration conf = new Configuration(); LOG.info("Arg 0: {}, Arg 1: {}, Arg 2: {}, Arg 3: {}, Arg 4: {}", args[0], args[1], args[2], args[3], args[4]);//from w w w. ja v a2 s . co m try (InputStream in = getClass().getClassLoader().getResourceAsStream("cluster_sdc.properties")) { properties = new Properties(); properties.load(in); String dataFormat = Utils.getHdfsDataFormat(properties); for (Object key : properties.keySet()) { String realKey = String.valueOf(key); // TODO - Override other configs set in HdfsSource if (overriddenConfs.contains(realKey)) { String value = Utils.getPropertyNotNull(properties, realKey); conf.set(realKey, value); } } String javaOpts = args[3]; Integer mapMemoryMb = HadoopMapReduceBinding.getMapMemoryMb(javaOpts, conf); if (mapMemoryMb != null) { conf.set(HadoopMapReduceBinding.MAPREDUCE_MAP_MEMORY_MB, String.valueOf(mapMemoryMb)); } conf.set(HadoopMapReduceBinding.MAPREDUCE_JAVA_OPTS, javaOpts); conf.setBoolean("mapreduce.map.speculative", false); conf.setBoolean("mapreduce.reduce.speculative", false); if ("AVRO".equalsIgnoreCase(dataFormat)) { conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat"); conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey"); } conf.set(MRJobConfig.MAP_LOG_LEVEL, args[4]); job = Job.getInstance(conf, "StreamSets Data Collector: " + properties.getProperty(ClusterModeConstants.CLUSTER_PIPELINE_TITLE) + "::" + args[2]); for (String archive : Arrays.asList(args[0].split("\\s*,\\s*"))) { job.addCacheArchive(new URI(archive)); } for (String libJar : Arrays.asList(args[1].split("\\s*,\\s*"))) { job.addFileToClassPath(new Path(libJar)); } job.setJarByClass(this.getClass()); job.setNumReduceTasks(0); if (!"AVRO".equalsIgnoreCase(dataFormat)) { job.setOutputKeyClass(NullWritable.class); } job.setMapperClass(PipelineMapper.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); } }
From source file:com.streamsets.pipeline.hadoop.HadoopMapReduceBinding.java
License:Apache License
@Override public void init() throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] remainingArgs = parser.getRemainingArgs(); properties = new Properties(); if (remainingArgs.length != 2) { List<String> argsList = new ArrayList<>(); for (String arg : remainingArgs) { argsList.add("'" + arg + "'"); }/* w ww. j a v a 2 s.c o m*/ throw new IllegalArgumentException("Error expected properties-file java-opts got: " + argsList); } String propertiesFile = remainingArgs[0]; String javaOpts = remainingArgs[1]; try (InputStream in = new FileInputStream(propertiesFile)) { properties.load(in); String dataFormat = getProperty("dataFormat"); String source = this.getClass().getSimpleName(); for (Object key : properties.keySet()) { String realKey = String.valueOf(key); String value = getProperty(realKey); conf.set(realKey, value, source); } conf.set("mapred.child.java.opts", javaOpts); conf.setBoolean("mapreduce.map.speculative", false); conf.setBoolean("mapreduce.reduce.speculative", false); if (dataFormat.equalsIgnoreCase("AVRO")) { conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat"); conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey"); } job = Job.getInstance(conf, "StreamSets Data Collector - Batch Execution Mode"); job.setJarByClass(this.getClass()); job.setNumReduceTasks(0); if (!dataFormat.equalsIgnoreCase("AVRO")) { job.setOutputKeyClass(NullWritable.class); } job.setMapperClass(PipelineMapper.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); } }
From source file:com.streamsets.pipeline.lib.hdfs.common.HdfsBaseConfigBean.java
License:Apache License
protected Configuration getHadoopConfiguration(Stage.Context context, List<Stage.ConfigIssue> issues) { Configuration conf = new Configuration(); conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class); //We handle the file system close ourselves in destroy //If enabled, Also this will cause issues (not allow us to rename the files on destroy call) // when we run a shutdown hook on app kill //See https://issues.streamsets.com/browse/SDC-4057 conf.setBoolean("fs.automatic.close", false); // See SDC-5451, we set hadoop.treat.subject.external automatically to take advantage of HADOOP-13805 HadoopConfigurationUtils.configureHadoopTreatSubjectExternal(conf); if (hdfsKerberos) { conf.set(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, UserGroupInformation.AuthenticationMethod.KERBEROS.name()); try {/*from w w w.ja v a 2 s . c o m*/ conf.set(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, "hdfs/_HOST@" + HadoopSecurityUtil.getDefaultRealm()); } catch (Exception ex) { if (!hdfsConfigs.stream().anyMatch(i -> DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY.equals(i.key))) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), null, Errors.HADOOPFS_28, ex.toString())); } } } if (hdfsConfDir != null && !hdfsConfDir.isEmpty()) { File hadoopConfigDir = new File(hdfsConfDir); if ((context.getExecutionMode() == ExecutionMode.CLUSTER_BATCH || context.getExecutionMode() == ExecutionMode.CLUSTER_YARN_STREAMING || context.getExecutionMode() == ExecutionMode.CLUSTER_MESOS_STREAMING) && hadoopConfigDir.isAbsolute()) { //Do not allow absolute hadoop config directory in cluster mode issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfDir", Errors.HADOOPFS_45, hdfsConfDir)); } else { if (!hadoopConfigDir.isAbsolute()) { hadoopConfigDir = new File(context.getResourcesDirectory(), hdfsConfDir).getAbsoluteFile(); } if (!hadoopConfigDir.exists()) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfDir", Errors.HADOOPFS_25, hadoopConfigDir.getPath())); } else if (!hadoopConfigDir.isDirectory()) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfDir", Errors.HADOOPFS_26, hadoopConfigDir.getPath())); } else { File coreSite = new File(hadoopConfigDir, "core-site.xml"); if (coreSite.exists()) { if (!coreSite.isFile()) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfDir", Errors.HADOOPFS_27, coreSite.getPath())); } conf.addResource(new Path(coreSite.getAbsolutePath())); } File hdfsSite = new File(hadoopConfigDir, "hdfs-site.xml"); if (hdfsSite.exists()) { if (!hdfsSite.isFile()) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfDir", Errors.HADOOPFS_27, hdfsSite.getPath())); } conf.addResource(new Path(hdfsSite.getAbsolutePath())); } } } } else { Optional<HadoopConfigBean> fsDefaultFS = hdfsConfigs.stream() .filter(item -> CommonConfigurationKeys.FS_DEFAULT_NAME_KEY.equals(item.key)).findFirst(); if (StringUtils.isEmpty(hdfsUri) && !fsDefaultFS.isPresent()) { // No URI, no config dir, and no fs.defaultFS config param // Avoid defaulting to writing to file:/// (SDC-5143) issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsUri", Errors.HADOOPFS_61)); } } for (HadoopConfigBean configBean : hdfsConfigs) { try { conf.set(configBean.key, configBean.value.get()); } catch (StageException e) { issues.add(context.createConfigIssue(Groups.HADOOP_FS.name(), getConfigBeanPrefix() + "hdfsConfigs", Errors.HADOOPFS_62, e.toString())); } } return conf; }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java
License:Apache License
@Override public void write(Batch batch) throws StageException { EvalContext eval = new EvalContext(getContext()); Iterator<Record> it = batch.getRecords(); while (it.hasNext()) { final Record record = it.next(); eval.setRecord(record);/* w ww . j a v a 2s . co m*/ Job job = null; try { // Job configuration object is a clone of the original one that we're keeping in mapReduceConfig class final Configuration jobConfiguration = new Configuration(mapReduceConfig.getConfiguration()); // Evaluate all dynamic properties and store them in the configuration job for (Map.Entry<String, String> entry : jobConfig.jobConfigs.entrySet()) { String key = eval.evaluateToString("jobConfigs", entry.getKey(), true); String value = eval.evaluateToString("jobConfigs", entry.getValue(), false); jobConfiguration.set(key, value); } // For build-in job creators, evaluate their properties and persist them in the MR config switch (jobConfig.jobType) { case AVRO_PARQUET: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, eval.evaluateToString( "compressionCodec", jobConfig.avroParquetConfig.compressionCodec, false)); jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE, jobConfig.avroParquetConfig.rowGroupSize); jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize); jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE, jobConfig.avroParquetConfig.dictionaryPageSize); jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE, jobConfig.avroParquetConfig.maxPaddingSize); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); break; case AVRO_ORC: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); jobConfiguration.setInt(AvroOrcConstants.ORC_BATCH_SIZE, jobConfig.avroOrcConfig.orcBatchSize); break; case CUSTOM: // Nothing because custom is generic one that have no special config properties break; default: throw new UnsupportedOperationException("Unsupported JobType: " + jobConfig.jobType); } job = createAndSubmitJob(jobConfiguration); } catch (IOException | InterruptedException | ELEvalException e) { LOG.error("Can't submit mapreduce job", e); errorRecordHandler.onError( new OnRecordErrorException(record, MapReduceErrors.MAPREDUCE_0005, e.getMessage(), e)); } if (job != null) { MapReduceExecutorEvents.JOB_CREATED.create(getContext()).with("tracking-url", job.getTrackingURL()) .with("job-id", job.getJobID().toString()).createAndSend(); } } }