List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:co.cask.cdap.internal.app.runtime.spark.ExecutionSparkContext.java
License:Apache License
@Override public <T> void writeToDataset(T rdd, String datasetName, Class<?> kClass, Class<?> vClass, Map<String, String> userDsArgs) { // Clone the configuration since it's dataset specification and shouldn't affect the global hConf Configuration configuration = new Configuration(hConf); // first try if it is OutputFormatProvider Map<String, String> dsArgs = RuntimeArguments.extractScope(Scope.DATASET, datasetName, getRuntimeArguments());/* ww w .j a v a 2 s. c o m*/ dsArgs.putAll(userDsArgs); Dataset dataset = instantiateDataset(datasetName, dsArgs); try { if (dataset instanceof OutputFormatProvider) { // get the output format and its configuration from the dataset String outputFormatName = ((OutputFormatProvider) dataset).getOutputFormatClassName(); // load the output format class if (outputFormatName == null) { throw new DatasetInstantiationException(String .format("Dataset '%s' provided null as the output format class name", datasetName)); } Class<? extends OutputFormat> outputFormatClass; try { @SuppressWarnings("unchecked") Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>) SparkClassLoader .findFromContext().loadClass(outputFormatName); outputFormatClass = ofClass; Map<String, String> outputConfig = ((OutputFormatProvider) dataset) .getOutputFormatConfiguration(); if (outputConfig != null) { for (Map.Entry<String, String> entry : outputConfig.entrySet()) { configuration.set(entry.getKey(), entry.getValue()); } } } catch (ClassNotFoundException e) { throw new DatasetInstantiationException( String.format("Cannot load input format class %s provided by dataset '%s'", outputFormatName, datasetName), e); } catch (ClassCastException e) { throw new DatasetInstantiationException( String.format("Input format class %s provided by dataset '%s' is not an input format", outputFormatName, datasetName), e); } try { getSparkFacade().saveAsDataset(rdd, outputFormatClass, kClass, vClass, configuration); } catch (Throwable t) { // whatever went wrong, give the dataset a chance to handle the failure if (dataset instanceof DatasetOutputCommitter) { ((DatasetOutputCommitter) dataset).onFailure(); } throw t; } if (dataset instanceof DatasetOutputCommitter) { ((DatasetOutputCommitter) dataset).onSuccess(); } return; } } finally { commitAndClose(datasetName, dataset); } // it must be supported by SparkDatasetOutputFormat SparkDatasetOutputFormat.setDataset(hConf, datasetName, dsArgs); getSparkFacade().saveAsDataset(rdd, SparkDatasetOutputFormat.class, kClass, vClass, new Configuration(hConf)); }
From source file:co.cask.cdap.internal.app.runtime.spark.JavaSparkFacade.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from www.j a v a 2 s.c om public <R, K, V> R createRDD(Class<? extends InputFormat> inputFormatClass, Class<K> keyClass, Class<V> valueClass, Configuration hConf) { hConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, inputFormatClass.getName()); return (R) sparkContext.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass); }
From source file:co.cask.cdap.internal.app.runtime.spark.JavaSparkFacade.java
License:Apache License
@SuppressWarnings("unchecked") @Override//ww w .j a v a2s .c o m public <R, K, V> void saveAsDataset(R rdd, Class<? extends OutputFormat> outputFormatClass, Class<K> keyClass, Class<V> valueClass, Configuration hConf) { Preconditions.checkArgument(rdd instanceof JavaPairRDD, "RDD class %s is not a subclass of %s", rdd.getClass().getName(), JavaPairRDD.class.getName()); hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClass.getName()); ((JavaPairRDD<K, V>) rdd).saveAsNewAPIHadoopDataset(hConf); }
From source file:co.cask.cdap.internal.app.runtime.spark.ScalaSparkFacade.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w w w . ja va 2s. c om public <R, K, V> void saveAsDataset(R rdd, Class<? extends OutputFormat> outputFormatClass, Class<K> keyClass, Class<V> valueClass, Configuration hConf) { Preconditions.checkArgument(rdd instanceof RDD, "RDD class %s is not a subclass of %s", rdd.getClass().getName(), RDD.class.getName()); hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClass.getName()); ClassTag<K> kClassTag = ClassTag$.MODULE$.apply(keyClass); ClassTag<V> vClassTag = ClassTag$.MODULE$.apply(valueClass); PairRDDFunctions<K, V> pairRDD = new PairRDDFunctions<K, V>((RDD<Tuple2<K, V>>) rdd, kClassTag, vClassTag, null); pairRDD.saveAsNewAPIHadoopDataset(hConf); }
From source file:co.cask.cdap.operations.hdfs.HDFSOperationalStatsTest.java
License:Apache License
@BeforeClass public static void setup() throws IOException { Configuration hConf = new Configuration(); hConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TMP_FOLDER.newFolder().getAbsolutePath()); dfsCluster = new MiniDFSCluster.Builder(hConf).numDataNodes(2).build(); dfsCluster.waitClusterUp();//from w ww .ja v a2s. co m }
From source file:co.cask.cdap.operations.yarn.YarnRMHAOperationalStatsTest.java
License:Apache License
@Override protected MiniYARNCluster createYarnCluster() throws IOException, InterruptedException, YarnException { Configuration hConf = new Configuration(); hConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); String hostname = MiniYARNCluster.getHostname(); for (String confKey : YarnConfiguration.RM_SERVICES_ADDRESS_CONF_KEYS) { hConf.set(HAUtil.addSuffix(confKey, "rm0"), hostname + ":" + Networks.getRandomPort()); hConf.set(HAUtil.addSuffix(confKey, "rm1"), hostname + ":" + Networks.getRandomPort()); }/*from w w w .j av a 2s. com*/ MiniYARNCluster yarnCluster = new MiniYARNCluster(getClass().getName(), 2, 2, 2, 2); yarnCluster.init(hConf); yarnCluster.start(); yarnCluster.getResourceManager(0).getRMContext().getRMAdminService().transitionToActive( new HAServiceProtocol.StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER)); return yarnCluster; }
From source file:co.cask.cdap.security.impersonation.UGIProviderTest.java
License:Apache License
@BeforeClass public static void init() throws Exception { cConf = CConfiguration.create();// www. j av a2 s. co m cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMP_FOLDER.newFolder().getAbsolutePath()); // Start KDC miniKdc = new MiniKdc(MiniKdc.createConf(), TEMP_FOLDER.newFolder()); miniKdc.start(); System.setProperty("java.security.krb5.conf", miniKdc.getKrb5conf().getAbsolutePath()); // Generate keytab keytabFile = TEMP_FOLDER.newFile(); miniKdc.createPrincipal(keytabFile, "hdfs", "alice", "bob"); // Start mini DFS cluster Configuration hConf = new Configuration(); hConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEMP_FOLDER.newFolder().getAbsolutePath()); hConf.setBoolean("ipc.client.fallback-to-simple-auth-allowed", true); miniDFSCluster = new MiniDFSCluster.Builder(hConf).numDataNodes(1).build(); miniDFSCluster.waitClusterUp(); locationFactory = new FileContextLocationFactory(miniDFSCluster.getFileSystem().getConf()); hConf = new Configuration(); hConf.set("hadoop.security.authentication", "kerberos"); UserGroupInformation.setConfiguration(hConf); }
From source file:co.cask.cdap.StandaloneMain.java
License:Apache License
public static StandaloneMain create(CConfiguration cConf, Configuration hConf) { // This is needed to use LocalJobRunner with fixes (we have it in app-fabric). // For the modified local job runner hConf.addResource("mapred-site-local.xml"); hConf.reloadConfiguration();//from w ww . jav a 2 s. c o m // Due to incredibly stupid design of Limits class, once it is initialized, it keeps its settings. We // want to make sure it uses our settings in this hConf, so we have to force it initialize here before // someone else initializes it. Limits.init(hConf); File localDataDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR)); hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath()); hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR)); hConf.set("hadoop.tmp.dir", new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath()); // Windows specific requirements if (OSDetector.isWindows()) { // not set anywhere by the project, expected to be set from IDEs if running from the project instead of sdk // hadoop.dll is at cdap-unit-test\src\main\resources\hadoop.dll for some reason String hadoopDLLPath = System.getProperty("hadoop.dll.path"); if (hadoopDLLPath != null) { System.load(hadoopDLLPath); } else { // this is where it is when the standalone sdk is built String userDir = System.getProperty("user.dir"); System.load(Joiner.on(File.separator).join(userDir, "lib", "native", "hadoop.dll")); } } //Run dataset service on random port List<Module> modules = createPersistentModules(cConf, hConf); return new StandaloneMain(modules, cConf); }
From source file:co.cask.cdap.template.etl.batch.ETLMapReduceTest.java
License:Apache License
@Test public void testS3toTPFS() throws Exception { String testPath = "s3n://test/2015-06-17-00-00-00.txt"; String testData = "Sample data for testing."; S3NInMemoryFileSystem fs = new S3NInMemoryFileSystem(); Configuration conf = new Configuration(); conf.set("fs.s3n.impl", S3NInMemoryFileSystem.class.getName()); fs.initialize(URI.create("s3n://test/"), conf); fs.createNewFile(new Path(testPath)); FSDataOutputStream writeData = fs.create(new Path(testPath)); writeData.write(testData.getBytes()); writeData.flush();//from www .ja v a2s.co m writeData.close(); Method method = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", new Class[] { URI.class, Configuration.class, FileSystem.class }); method.setAccessible(true); method.invoke(FileSystem.class, URI.create("s3n://test/"), conf, fs); ETLStage source = new ETLStage("S3", ImmutableMap.<String, String>builder().put(Properties.S3.ACCESS_KEY, "key") .put(Properties.S3.ACCESS_ID, "ID").put(Properties.S3.PATH, testPath).build()); ETLStage sink = new ETLStage("TPFSAvro", ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA, FileBatchSource.DEFAULT_SCHEMA.toString(), Properties.TimePartitionedFileSetDataset.TPFS_NAME, "TPFSsink")); ETLBatchConfig etlConfig = new ETLBatchConfig("* * * * *", source, sink, Lists.<ETLStage>newArrayList()); AdapterConfig adapterConfig = new AdapterConfig("", TEMPLATE_ID.getId(), GSON.toJsonTree(etlConfig)); Id.Adapter adapterId = Id.Adapter.from(NAMESPACE, "testS3Adapter"); AdapterManager manager = createAdapter(adapterId, adapterConfig); manager.start(); manager.waitForOneRunToFinish(2, TimeUnit.MINUTES); manager.stop(); DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("TPFSsink"); TimePartitionedFileSet fileSet = fileSetManager.get(); List<GenericRecord> records = readOutput(fileSet, FileBatchSource.DEFAULT_SCHEMA); Assert.assertEquals(1, records.size()); Assert.assertEquals(testData, records.get(0).get("body").toString()); fileSet.close(); }
From source file:co.cask.cdap.template.etl.batch.source.FileBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { //SimpleDateFormat needs to be local because it is not threadsafe SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH"); //calculate date one hour ago, rounded down to the nearest hour prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1)); Calendar cal = Calendar.getInstance(); cal.setTime(prevHour);/*from w w w . j ava 2 s . c o m*/ cal.set(Calendar.MINUTE, 0); cal.set(Calendar.SECOND, 0); cal.set(Calendar.MILLISECOND, 0); prevHour = cal.getTime(); Job job = context.getHadoopJob(); Configuration conf = job.getConfiguration(); if (config.fileSystemProperties != null) { Map<String, String> properties = GSON.fromJson(config.fileSystemProperties, MAP_STRING_STRING_TYPE); for (Map.Entry<String, String> entry : properties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } } if (config.fileRegex != null) { conf.set(INPUT_REGEX_CONFIG, config.fileRegex); } conf.set(INPUT_NAME_CONFIG, config.path); if (config.timeTable != null) { table = context.getDataset(config.timeTable); datesToRead = Bytes.toString(table.read(LAST_TIME_READ)); if (datesToRead == null) { List<Date> firstRun = Lists.newArrayList(new Date(0)); datesToRead = GSON.toJson(firstRun, ARRAYLIST_DATE_TYPE); } List<Date> attempted = Lists.newArrayList(prevHour); String updatedDatesToRead = GSON.toJson(attempted, ARRAYLIST_DATE_TYPE); if (!updatedDatesToRead.equals(datesToRead)) { table.write(LAST_TIME_READ, updatedDatesToRead); } conf.set(LAST_TIME_READ, datesToRead); } conf.set(CUTOFF_READ_TIME, dateFormat.format(prevHour)); if (!Strings.isNullOrEmpty(config.inputFormatClass)) { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); Class<? extends FileInputFormat> classType = (Class<? extends FileInputFormat>) classLoader .loadClass(config.inputFormatClass); job.setInputFormatClass(classType); } else { job.setInputFormatClass(CombineTextInputFormat.class); } FileInputFormat.setInputPathFilter(job, BatchFileFilter.class); FileInputFormat.addInputPath(job, new Path(config.path)); long maxSplitSize; try { maxSplitSize = Long.parseLong(config.maxSplitSize); } catch (NumberFormatException e) { maxSplitSize = DEFAULT_SPLIT_SIZE; } CombineTextInputFormat.setMaxInputSplitSize(job, maxSplitSize); }