Example usage for org.apache.hadoop.conf Configuration set

List of usage examples for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:co.cask.cdap.internal.app.runtime.spark.ExecutionSparkContext.java

License:Apache License

@Override
public <T> void writeToDataset(T rdd, String datasetName, Class<?> kClass, Class<?> vClass,
        Map<String, String> userDsArgs) {
    // Clone the configuration since it's dataset specification and shouldn't affect the global hConf
    Configuration configuration = new Configuration(hConf);

    // first try if it is OutputFormatProvider
    Map<String, String> dsArgs = RuntimeArguments.extractScope(Scope.DATASET, datasetName,
            getRuntimeArguments());/* ww  w  .j  a  v a  2 s.  c o  m*/
    dsArgs.putAll(userDsArgs);
    Dataset dataset = instantiateDataset(datasetName, dsArgs);
    try {
        if (dataset instanceof OutputFormatProvider) {
            // get the output format and its configuration from the dataset
            String outputFormatName = ((OutputFormatProvider) dataset).getOutputFormatClassName();
            // load the output format class
            if (outputFormatName == null) {
                throw new DatasetInstantiationException(String
                        .format("Dataset '%s' provided null as the output format class name", datasetName));
            }
            Class<? extends OutputFormat> outputFormatClass;
            try {
                @SuppressWarnings("unchecked")
                Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>) SparkClassLoader
                        .findFromContext().loadClass(outputFormatName);
                outputFormatClass = ofClass;
                Map<String, String> outputConfig = ((OutputFormatProvider) dataset)
                        .getOutputFormatConfiguration();
                if (outputConfig != null) {
                    for (Map.Entry<String, String> entry : outputConfig.entrySet()) {
                        configuration.set(entry.getKey(), entry.getValue());
                    }
                }
            } catch (ClassNotFoundException e) {
                throw new DatasetInstantiationException(
                        String.format("Cannot load input format class %s provided by dataset '%s'",
                                outputFormatName, datasetName),
                        e);
            } catch (ClassCastException e) {
                throw new DatasetInstantiationException(
                        String.format("Input format class %s provided by dataset '%s' is not an input format",
                                outputFormatName, datasetName),
                        e);
            }
            try {
                getSparkFacade().saveAsDataset(rdd, outputFormatClass, kClass, vClass, configuration);
            } catch (Throwable t) {
                // whatever went wrong, give the dataset a chance to handle the failure
                if (dataset instanceof DatasetOutputCommitter) {
                    ((DatasetOutputCommitter) dataset).onFailure();
                }
                throw t;
            }
            if (dataset instanceof DatasetOutputCommitter) {
                ((DatasetOutputCommitter) dataset).onSuccess();
            }
            return;
        }
    } finally {
        commitAndClose(datasetName, dataset);
    }

    // it must be supported by SparkDatasetOutputFormat
    SparkDatasetOutputFormat.setDataset(hConf, datasetName, dsArgs);
    getSparkFacade().saveAsDataset(rdd, SparkDatasetOutputFormat.class, kClass, vClass,
            new Configuration(hConf));
}

From source file:co.cask.cdap.internal.app.runtime.spark.JavaSparkFacade.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from www.j a  v a  2 s.c  om
public <R, K, V> R createRDD(Class<? extends InputFormat> inputFormatClass, Class<K> keyClass,
        Class<V> valueClass, Configuration hConf) {
    hConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, inputFormatClass.getName());
    return (R) sparkContext.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass);
}

From source file:co.cask.cdap.internal.app.runtime.spark.JavaSparkFacade.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//ww  w .j a  v  a2s  .c  o m
public <R, K, V> void saveAsDataset(R rdd, Class<? extends OutputFormat> outputFormatClass, Class<K> keyClass,
        Class<V> valueClass, Configuration hConf) {
    Preconditions.checkArgument(rdd instanceof JavaPairRDD, "RDD class %s is not a subclass of %s",
            rdd.getClass().getName(), JavaPairRDD.class.getName());
    hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClass.getName());
    ((JavaPairRDD<K, V>) rdd).saveAsNewAPIHadoopDataset(hConf);
}

From source file:co.cask.cdap.internal.app.runtime.spark.ScalaSparkFacade.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from w w w . ja va  2s. c om
public <R, K, V> void saveAsDataset(R rdd, Class<? extends OutputFormat> outputFormatClass, Class<K> keyClass,
        Class<V> valueClass, Configuration hConf) {
    Preconditions.checkArgument(rdd instanceof RDD, "RDD class %s is not a subclass of %s",
            rdd.getClass().getName(), RDD.class.getName());

    hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClass.getName());

    ClassTag<K> kClassTag = ClassTag$.MODULE$.apply(keyClass);
    ClassTag<V> vClassTag = ClassTag$.MODULE$.apply(valueClass);

    PairRDDFunctions<K, V> pairRDD = new PairRDDFunctions<K, V>((RDD<Tuple2<K, V>>) rdd, kClassTag, vClassTag,
            null);
    pairRDD.saveAsNewAPIHadoopDataset(hConf);
}

From source file:co.cask.cdap.operations.hdfs.HDFSOperationalStatsTest.java

License:Apache License

@BeforeClass
public static void setup() throws IOException {
    Configuration hConf = new Configuration();
    hConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TMP_FOLDER.newFolder().getAbsolutePath());
    dfsCluster = new MiniDFSCluster.Builder(hConf).numDataNodes(2).build();
    dfsCluster.waitClusterUp();//from w  ww .ja  v  a2s. co m
}

From source file:co.cask.cdap.operations.yarn.YarnRMHAOperationalStatsTest.java

License:Apache License

@Override
protected MiniYARNCluster createYarnCluster() throws IOException, InterruptedException, YarnException {
    Configuration hConf = new Configuration();
    hConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
    String hostname = MiniYARNCluster.getHostname();
    for (String confKey : YarnConfiguration.RM_SERVICES_ADDRESS_CONF_KEYS) {
        hConf.set(HAUtil.addSuffix(confKey, "rm0"), hostname + ":" + Networks.getRandomPort());
        hConf.set(HAUtil.addSuffix(confKey, "rm1"), hostname + ":" + Networks.getRandomPort());
    }/*from  w w  w .j  av  a  2s. com*/
    MiniYARNCluster yarnCluster = new MiniYARNCluster(getClass().getName(), 2, 2, 2, 2);
    yarnCluster.init(hConf);
    yarnCluster.start();
    yarnCluster.getResourceManager(0).getRMContext().getRMAdminService().transitionToActive(
            new HAServiceProtocol.StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER));
    return yarnCluster;
}

From source file:co.cask.cdap.security.impersonation.UGIProviderTest.java

License:Apache License

@BeforeClass
public static void init() throws Exception {
    cConf = CConfiguration.create();//  www. j av a2  s.  co  m
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMP_FOLDER.newFolder().getAbsolutePath());

    // Start KDC
    miniKdc = new MiniKdc(MiniKdc.createConf(), TEMP_FOLDER.newFolder());
    miniKdc.start();
    System.setProperty("java.security.krb5.conf", miniKdc.getKrb5conf().getAbsolutePath());

    // Generate keytab
    keytabFile = TEMP_FOLDER.newFile();
    miniKdc.createPrincipal(keytabFile, "hdfs", "alice", "bob");

    // Start mini DFS cluster
    Configuration hConf = new Configuration();
    hConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEMP_FOLDER.newFolder().getAbsolutePath());
    hConf.setBoolean("ipc.client.fallback-to-simple-auth-allowed", true);

    miniDFSCluster = new MiniDFSCluster.Builder(hConf).numDataNodes(1).build();
    miniDFSCluster.waitClusterUp();
    locationFactory = new FileContextLocationFactory(miniDFSCluster.getFileSystem().getConf());

    hConf = new Configuration();
    hConf.set("hadoop.security.authentication", "kerberos");
    UserGroupInformation.setConfiguration(hConf);
}

From source file:co.cask.cdap.StandaloneMain.java

License:Apache License

public static StandaloneMain create(CConfiguration cConf, Configuration hConf) {
    // This is needed to use LocalJobRunner with fixes (we have it in app-fabric).
    // For the modified local job runner
    hConf.addResource("mapred-site-local.xml");
    hConf.reloadConfiguration();//from w ww  .  jav  a 2 s. c o  m
    // Due to incredibly stupid design of Limits class, once it is initialized, it keeps its settings. We
    // want to make sure it uses our settings in this hConf, so we have to force it initialize here before
    // someone else initializes it.
    Limits.init(hConf);

    File localDataDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR));
    hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath());
    hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR));
    hConf.set("hadoop.tmp.dir",
            new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath());

    // Windows specific requirements
    if (OSDetector.isWindows()) {
        // not set anywhere by the project, expected to be set from IDEs if running from the project instead of sdk
        // hadoop.dll is at cdap-unit-test\src\main\resources\hadoop.dll for some reason
        String hadoopDLLPath = System.getProperty("hadoop.dll.path");
        if (hadoopDLLPath != null) {
            System.load(hadoopDLLPath);
        } else {
            // this is where it is when the standalone sdk is built
            String userDir = System.getProperty("user.dir");
            System.load(Joiner.on(File.separator).join(userDir, "lib", "native", "hadoop.dll"));
        }
    }

    //Run dataset service on random port
    List<Module> modules = createPersistentModules(cConf, hConf);

    return new StandaloneMain(modules, cConf);
}

From source file:co.cask.cdap.template.etl.batch.ETLMapReduceTest.java

License:Apache License

@Test
public void testS3toTPFS() throws Exception {
    String testPath = "s3n://test/2015-06-17-00-00-00.txt";
    String testData = "Sample data for testing.";

    S3NInMemoryFileSystem fs = new S3NInMemoryFileSystem();
    Configuration conf = new Configuration();
    conf.set("fs.s3n.impl", S3NInMemoryFileSystem.class.getName());
    fs.initialize(URI.create("s3n://test/"), conf);
    fs.createNewFile(new Path(testPath));

    FSDataOutputStream writeData = fs.create(new Path(testPath));
    writeData.write(testData.getBytes());
    writeData.flush();//from   www .ja  v a2s.co  m
    writeData.close();

    Method method = FileSystem.class.getDeclaredMethod("addFileSystemForTesting",
            new Class[] { URI.class, Configuration.class, FileSystem.class });
    method.setAccessible(true);
    method.invoke(FileSystem.class, URI.create("s3n://test/"), conf, fs);
    ETLStage source = new ETLStage("S3",
            ImmutableMap.<String, String>builder().put(Properties.S3.ACCESS_KEY, "key")
                    .put(Properties.S3.ACCESS_ID, "ID").put(Properties.S3.PATH, testPath).build());

    ETLStage sink = new ETLStage("TPFSAvro",
            ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA,
                    FileBatchSource.DEFAULT_SCHEMA.toString(),
                    Properties.TimePartitionedFileSetDataset.TPFS_NAME, "TPFSsink"));
    ETLBatchConfig etlConfig = new ETLBatchConfig("* * * * *", source, sink, Lists.<ETLStage>newArrayList());
    AdapterConfig adapterConfig = new AdapterConfig("", TEMPLATE_ID.getId(), GSON.toJsonTree(etlConfig));
    Id.Adapter adapterId = Id.Adapter.from(NAMESPACE, "testS3Adapter");
    AdapterManager manager = createAdapter(adapterId, adapterConfig);

    manager.start();
    manager.waitForOneRunToFinish(2, TimeUnit.MINUTES);
    manager.stop();

    DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("TPFSsink");
    TimePartitionedFileSet fileSet = fileSetManager.get();
    List<GenericRecord> records = readOutput(fileSet, FileBatchSource.DEFAULT_SCHEMA);
    Assert.assertEquals(1, records.size());
    Assert.assertEquals(testData, records.get(0).get("body").toString());
    fileSet.close();
}

From source file:co.cask.cdap.template.etl.batch.source.FileBatchSource.java

License:Apache License

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
    //SimpleDateFormat needs to be local because it is not threadsafe
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH");

    //calculate date one hour ago, rounded down to the nearest hour
    prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1));
    Calendar cal = Calendar.getInstance();
    cal.setTime(prevHour);/*from   w  w w .  j ava 2  s  . c o m*/
    cal.set(Calendar.MINUTE, 0);
    cal.set(Calendar.SECOND, 0);
    cal.set(Calendar.MILLISECOND, 0);
    prevHour = cal.getTime();

    Job job = context.getHadoopJob();
    Configuration conf = job.getConfiguration();
    if (config.fileSystemProperties != null) {
        Map<String, String> properties = GSON.fromJson(config.fileSystemProperties, MAP_STRING_STRING_TYPE);
        for (Map.Entry<String, String> entry : properties.entrySet()) {
            conf.set(entry.getKey(), entry.getValue());
        }
    }

    if (config.fileRegex != null) {
        conf.set(INPUT_REGEX_CONFIG, config.fileRegex);
    }
    conf.set(INPUT_NAME_CONFIG, config.path);

    if (config.timeTable != null) {
        table = context.getDataset(config.timeTable);
        datesToRead = Bytes.toString(table.read(LAST_TIME_READ));
        if (datesToRead == null) {
            List<Date> firstRun = Lists.newArrayList(new Date(0));
            datesToRead = GSON.toJson(firstRun, ARRAYLIST_DATE_TYPE);
        }
        List<Date> attempted = Lists.newArrayList(prevHour);
        String updatedDatesToRead = GSON.toJson(attempted, ARRAYLIST_DATE_TYPE);
        if (!updatedDatesToRead.equals(datesToRead)) {
            table.write(LAST_TIME_READ, updatedDatesToRead);
        }
        conf.set(LAST_TIME_READ, datesToRead);
    }

    conf.set(CUTOFF_READ_TIME, dateFormat.format(prevHour));
    if (!Strings.isNullOrEmpty(config.inputFormatClass)) {
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        Class<? extends FileInputFormat> classType = (Class<? extends FileInputFormat>) classLoader
                .loadClass(config.inputFormatClass);
        job.setInputFormatClass(classType);
    } else {
        job.setInputFormatClass(CombineTextInputFormat.class);
    }
    FileInputFormat.setInputPathFilter(job, BatchFileFilter.class);
    FileInputFormat.addInputPath(job, new Path(config.path));
    long maxSplitSize;
    try {
        maxSplitSize = Long.parseLong(config.maxSplitSize);
    } catch (NumberFormatException e) {
        maxSplitSize = DEFAULT_SPLIT_SIZE;
    }
    CombineTextInputFormat.setMaxInputSplitSize(job, maxSplitSize);
}