Example usage for org.apache.hadoop.conf Configuration getStringCollection

List of usage examples for org.apache.hadoop.conf Configuration getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStringCollection.

Prototype

public Collection<String> getStringCollection(String name) 

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache.//from w w w. ja v a2 s . c o m
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null) {
            continue;
        }

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty()) {
        return;
    }

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}

From source file:org.apache.lens.driver.es.client.jest.JestClientImpl.java

License:Apache License

public JestClientImpl(ESDriverConfig esDriverConfig, Configuration conf) {
    super(esDriverConfig, conf);
    final JestClientFactory factory = new JestClientFactory();
    factory.setHttpClientConfig(/*w  ww .ja  va  2  s.  co m*/
            new HttpClientConfig.Builder(Validate.notNull(conf.getStringCollection(ES_SERVERS)))
                    .maxTotalConnection(conf.getInt(MAX_TOTAL_CONN, DEFAULT_MAX_CONN))
                    .multiThreaded(conf.getBoolean(IS_MULTITHREADED, DEFAULT_MULTI_THREADED))
                    .readTimeout(esDriverConfig.getQueryTimeOutMs()).build());
    client = factory.getObject();
}

From source file:org.apache.oozie.action.hadoop.LauncherHelper.java

License:Apache License

public static void setupLauncherInfo(Configuration launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherAMUtils.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {/*from  w ww  .ja  va2  s.  c om*/
        List<String> purgedEntries = new ArrayList<>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }
}

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.setMapperClass(LauncherMapper.class);
    launcherConf.setSpeculativeExecution(false);
    launcherConf.setNumMapTasks(1);/* w  ww . j av  a 2  s  .c  o m*/
    launcherConf.setNumReduceTasks(0);

    launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {
        List<String> purgedEntries = new ArrayList<String>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }

    FileSystem fs = Services.get().get(HadoopAccessorService.class)
            .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf);
    fs.mkdirs(actionDir);

    OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML));
    try {
        actionConf.writeXml(os);
    } finally {
        IOUtils.closeSafely(os);
    }

    launcherConf.setInputFormat(OozieLauncherInputFormat.class);
    launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString());
}

From source file:org.apache.pig.backend.hadoop.accumulo.Utils.java

License:Apache License

public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can
    // avoid/*from ww  w  .  j  a v  a  2 s . com*/
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null)
            continue;

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            log.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            log.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}

From source file:org.apache.pig.impl.util.JarManager.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache/*w  w  w  .  j a  va 2s  .  c  om*/
 * 
 * @param job
 *           Job object
 * @param classes
 *            classes to find
 * @throws IOException
 */
public static void addDependencyJars(Job job, Class<?>... classes) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    jars.addAll(conf.getStringCollection("tmpjars"));
    addQualifiedJarsName(fs, jars, classes);
    if (jars.isEmpty())
        return;
    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}

From source file:org.apache.sqoop.accumulo.AccumuloUtil.java

License:Apache License

/**
 * Add the Accumulo jar files to local classpath and dist cache.
 * @throws IOException/*  w  ww .  j a v a2  s .  c  o m*/
 */
public static void addJars(Job job, SqoopOptions options) throws IOException {

    if (isLocalJobTracker(job)) {
        LOG.info("Not adding Accumulo jars to distributed cache in local mode");
    } else if (options.isSkipDistCache()) {
        LOG.info("Not adding Accumulo jars to distributed cache as requested");
    } else {
        Configuration conf = job.getConfiguration();
        String accumuloHome = null;
        String zookeeperHome = null;
        FileSystem fs = FileSystem.getLocal(conf);
        if (options != null) {
            accumuloHome = options.getAccumuloHome();
        }
        if (accumuloHome == null) {
            accumuloHome = SqoopOptions.getAccumuloHomeDefault();
        }
        LOG.info("Accumulo job : Accumulo Home = " + accumuloHome);
        if (options != null) {
            zookeeperHome = options.getZookeeperHome();
        }
        if (zookeeperHome == null) {
            zookeeperHome = SqoopOptions.getZookeeperHomeDefault();
        }
        LOG.info("Accumulo job : Zookeeper Home = " + zookeeperHome);

        conf.addResource(accumuloHome + AccumuloConstants.ACCUMULO_SITE_XML_PATH);

        // Add any libjars already specified
        Set<String> localUrls = new HashSet<String>();
        localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM));

        String dir = accumuloHome + File.separator + "lib";
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);

        dir = zookeeperHome;
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);

        String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM);
        StringBuilder sb = new StringBuilder(1024);
        if (null != tmpjars) {
            sb.append(tmpjars);
            sb.append(",");
        }
        sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0])));
        conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString());
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

/**
 * Add the Hive and HCatalog jar files to local classpath and dist cache.
 * @throws IOException/*from w ww.j a  va  2s.c  o  m*/
 */
public static void addJars(Job job, SqoopOptions options) throws IOException {

    if (isLocalJobTracker(job)) {
        LOG.info("Not adding hcatalog jars to distributed cache in local mode");
        return;
    }
    if (options.isSkipDistCache()) {
        LOG.info("Not adding hcatalog jars to distributed cache as requested");
        return;
    }
    Configuration conf = job.getConfiguration();
    String hiveHome = null;
    String hCatHome = null;
    FileSystem fs = FileSystem.getLocal(conf);
    if (options != null) {
        hiveHome = options.getHiveHome();
    }
    if (hiveHome == null) {
        hiveHome = SqoopOptions.getHiveHomeDefault();
    }
    if (options != null) {
        hCatHome = options.getHCatHome();
    }
    if (hCatHome == null) {
        hCatHome = SqoopOptions.getHCatHomeDefault();
    }
    LOG.info("HCatalog job : Hive Home = " + hiveHome);
    LOG.info("HCatalog job:  HCatalog Home = " + hCatHome);

    conf.addResource(hiveHome + HIVESITEXMLPATH);

    // Add these to the 'tmpjars' array, which the MR JobSubmitter
    // will upload to HDFS and put in the DistributedCache libjars.
    List<String> libDirs = new ArrayList<String>();
    libDirs.add(hCatHome + File.separator + HCATSHAREDIR);
    libDirs.add(hCatHome + File.separator + DEFLIBDIR);
    libDirs.add(hiveHome + File.separator + DEFLIBDIR);
    Set<String> localUrls = new HashSet<String>();
    // Add any libjars already specified
    localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM));
    for (String dir : libDirs) {
        LOG.info("Adding jar files under " + dir + " to distributed cache");
        addDirToCache(new File(dir), fs, localUrls, false);
    }

    // Recursively add all hcatalog storage handler jars
    // The HBase storage handler is getting deprecated post Hive+HCat merge
    String hCatStorageHandlerDir = hCatHome + File.separator + "share/hcatalog/storage-handlers";
    LOG.info("Adding jar files under " + hCatStorageHandlerDir + " to distributed cache (recursively)");

    addDirToCache(new File(hCatStorageHandlerDir), fs, localUrls, true);

    String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM);
    StringBuilder sb = new StringBuilder(1024);
    if (null != tmpjars) {
        sb.append(tmpjars);
        sb.append(",");
    }
    sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0])));
    conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString());
}

From source file:org.apache.tez.runtime.common.resources.TestWeightedScalingMemoryDistributor.java

License:Apache License

@Test(timeout = 5000)
public void testSimpleWeightedScaling() {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS,
            WeightedScalingMemoryDistributor.generateWeightStrings(0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",")
            .join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));

    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);

    dist.setJvmMemory(10000l);/*from www.  j ava 2s . com*/

    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);

    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);

    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);

    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);

    dist.makeInitialAllocations();

    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration such
 * that JobClient will ship them to the cluster and add them to the
 * DistributedCache./*w  w w. j av  a2 s.  c  o  m*/
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can
    // avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null)
            continue;

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}