Example usage for org.apache.hadoop.conf Configuration getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStringCollection.

Prototype

public Collection<String> getStringCollection(String name)

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache.//from w w w. ja v a2 s . c o m
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null) {
            continue;
        }

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty()) {
        return;
    }

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}

From source file:org.apache.lens.driver.es.client.jest.JestClientImpl.java

License:Apache License

public JestClientImpl(ESDriverConfig esDriverConfig, Configuration conf) {
    super(esDriverConfig, conf);
    final JestClientFactory factory = new JestClientFactory();
    factory.setHttpClientConfig(/*w  ww .ja  va  2  s.  co m*/
            new HttpClientConfig.Builder(Validate.notNull(conf.getStringCollection(ES_SERVERS)))
                    .maxTotalConnection(conf.getInt(MAX_TOTAL_CONN, DEFAULT_MAX_CONN))
                    .multiThreaded(conf.getBoolean(IS_MULTITHREADED, DEFAULT_MULTI_THREADED))
                    .readTimeout(esDriverConfig.getQueryTimeOutMs()).build());
    client = factory.getObject();
}

From source file:org.apache.oozie.action.hadoop.LauncherHelper.java

License:Apache License

public static void setupLauncherInfo(Configuration launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherAMUtils.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {/*from  w ww  .ja  va2  s.  c om*/
        List<String> purgedEntries = new ArrayList<>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }
}

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.setMapperClass(LauncherMapper.class);
    launcherConf.setSpeculativeExecution(false);
    launcherConf.setNumMapTasks(1);/* w  ww . j av  a 2  s  .c  o m*/
    launcherConf.setNumReduceTasks(0);

    launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {
        List<String> purgedEntries = new ArrayList<String>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }

    FileSystem fs = Services.get().get(HadoopAccessorService.class)
            .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf);
    fs.mkdirs(actionDir);

    OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML));
    try {
        actionConf.writeXml(os);
    } finally {
        IOUtils.closeSafely(os);
    }

    launcherConf.setInputFormat(OozieLauncherInputFormat.class);
    launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString());
}

From source file:org.apache.pig.backend.hadoop.accumulo.Utils.java

License:Apache License

public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can
    // avoid/*from ww  w  .  j  a v  a  2 s . com*/
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null)
            continue;

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            log.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            log.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}

From source file:org.apache.pig.impl.util.JarManager.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache/*w  w  w  .  j a  va 2s  .  c  om*/
 * 
 * @param job
 *           Job object
 * @param classes
 *            classes to find
 * @throws IOException
 */
public static void addDependencyJars(Job job, Class<?>... classes) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    jars.addAll(conf.getStringCollection("tmpjars"));
    addQualifiedJarsName(fs, jars, classes);
    if (jars.isEmpty())
        return;
    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}

From source file:org.apache.sqoop.accumulo.AccumuloUtil.java

License:Apache License

/**
 * Add the Accumulo jar files to local classpath and dist cache.
 * @throws IOException/*  w  ww .  j a v a2  s .  c  o m*/
 */
public static void addJars(Job job, SqoopOptions options) throws IOException {

    if (isLocalJobTracker(job)) {
        LOG.info("Not adding Accumulo jars to distributed cache in local mode");
    } else if (options.isSkipDistCache()) {
        LOG.info("Not adding Accumulo jars to distributed cache as requested");
    } else {
        Configuration conf = job.getConfiguration();
        String accumuloHome = null;
        String zookeeperHome = null;
        FileSystem fs = FileSystem.getLocal(conf);
        if (options != null) {
            accumuloHome = options.getAccumuloHome();
        }
        if (accumuloHome == null) {
            accumuloHome = SqoopOptions.getAccumuloHomeDefault();
        }
        LOG.info("Accumulo job : Accumulo Home = " + accumuloHome);
        if (options != null) {
            zookeeperHome = options.getZookeeperHome();
        }
        if (zookeeperHome == null) {
            zookeeperHome = SqoopOptions.getZookeeperHomeDefault();
        }
        LOG.info("Accumulo job : Zookeeper Home = " + zookeeperHome);

        conf.addResource(accumuloHome + AccumuloConstants.ACCUMULO_SITE_XML_PATH);

        // Add any libjars already specified
        Set<String> localUrls = new HashSet<String>();
        localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM));

        String dir = accumuloHome + File.separator + "lib";
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);

        dir = zookeeperHome;
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);

        String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM);
        StringBuilder sb = new StringBuilder(1024);
        if (null != tmpjars) {
            sb.append(tmpjars);
            sb.append(",");
        }
        sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0])));
        conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString());
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

/**
 * Add the Hive and HCatalog jar files to local classpath and dist cache.
 * @throws IOException/*from w ww.j a  va  2s.c  o  m*/
 */
public static void addJars(Job job, SqoopOptions options) throws IOException {

    if (isLocalJobTracker(job)) {
        LOG.info("Not adding hcatalog jars to distributed cache in local mode");
        return;
    }
    if (options.isSkipDistCache()) {
        LOG.info("Not adding hcatalog jars to distributed cache as requested");
        return;
    }
    Configuration conf = job.getConfiguration();
    String hiveHome = null;
    String hCatHome = null;
    FileSystem fs = FileSystem.getLocal(conf);
    if (options != null) {
        hiveHome = options.getHiveHome();
    }
    if (hiveHome == null) {
        hiveHome = SqoopOptions.getHiveHomeDefault();
    }
    if (options != null) {
        hCatHome = options.getHCatHome();
    }
    if (hCatHome == null) {
        hCatHome = SqoopOptions.getHCatHomeDefault();
    }
    LOG.info("HCatalog job : Hive Home = " + hiveHome);
    LOG.info("HCatalog job:  HCatalog Home = " + hCatHome);

    conf.addResource(hiveHome + HIVESITEXMLPATH);

    // Add these to the 'tmpjars' array, which the MR JobSubmitter
    // will upload to HDFS and put in the DistributedCache libjars.
    List<String> libDirs = new ArrayList<String>();
    libDirs.add(hCatHome + File.separator + HCATSHAREDIR);
    libDirs.add(hCatHome + File.separator + DEFLIBDIR);
    libDirs.add(hiveHome + File.separator + DEFLIBDIR);
    Set<String> localUrls = new HashSet<String>();
    // Add any libjars already specified
    localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM));
    for (String dir : libDirs) {
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);
    }

    // Recursively add all hcatalog storage handler jars
    // The HBase storage handler is getting deprecated post Hive+HCat merge
    String hCatStorageHandlerDir = hCatHome + File.separator + "share/hcatalog/storage-handlers";
    LOG.info("Adding jar files under " + hCatStorageHandlerDir + " to distributed cache (recursively)");

    addDirToCache(new File(hCatStorageHandlerDir), fs, localUrls, true);

    String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM);
    StringBuilder sb = new StringBuilder(1024);
    if (null != tmpjars) {
        sb.append(tmpjars);
        sb.append(",");
    }
    sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0])));
    conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString());
}

From source file:org.apache.tez.runtime.common.resources.TestWeightedScalingMemoryDistributor.java

License:Apache License

@Test(timeout = 5000)
public void testSimpleWeightedScaling() {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS,
            WeightedScalingMemoryDistributor.generateWeightStrings(0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",")
            .join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));

    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);

    dist.setJvmMemory(10000l);/*from www.  j ava 2s . com*/

    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);

    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);

    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);

    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);

    dist.makeInitialAllocations();

    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration such
 * that JobClient will ship them to the cluster and add them to the
 * DistributedCache./*w  w w. j av  a2 s.  c  o  m*/
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can
    // avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null)
            continue;

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}