List of usage examples for org.apache.hadoop.conf Configuration getStringCollection
public Collection<String> getStringCollection(String name)
name
property as a collection of String
s. From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration * such that JobClient will ship them to the cluster and add them to * the DistributedCache.//from w w w. ja v a2 s . c o m */ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // add jars as we find them to a map of contents jar name so that we can avoid // creating new jars for classes that have already been packaged. Map<String, String> packagedClasses = new HashMap<String, String>(); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) { continue; } Path path = findOrCreateJar(clazz, localFs, packagedClasses); if (path == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()]))); }
From source file:org.apache.lens.driver.es.client.jest.JestClientImpl.java
License:Apache License
public JestClientImpl(ESDriverConfig esDriverConfig, Configuration conf) { super(esDriverConfig, conf); final JestClientFactory factory = new JestClientFactory(); factory.setHttpClientConfig(/*w ww .ja va 2 s. co m*/ new HttpClientConfig.Builder(Validate.notNull(conf.getStringCollection(ES_SERVERS))) .maxTotalConnection(conf.getInt(MAX_TOTAL_CONN, DEFAULT_MAX_CONN)) .multiThreaded(conf.getBoolean(IS_MULTITHREADED, DEFAULT_MULTI_THREADED)) .readTimeout(esDriverConfig.getQueryTimeOutMs()).build()); client = factory.getObject(); }
From source file:org.apache.oozie.action.hadoop.LauncherHelper.java
License:Apache License
public static void setupLauncherInfo(Configuration launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherAMUtils.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId); actionConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) {/*from w ww .ja va2 s. c om*/ List<String> purgedEntries = new ArrayList<>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } }
From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java
License:Apache License
public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.setMapperClass(LauncherMapper.class); launcherConf.setSpeculativeExecution(false); launcherConf.setNumMapTasks(1);/* w ww . j av a 2 s .c o m*/ launcherConf.setNumReduceTasks(0); launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) { List<String> purgedEntries = new ArrayList<String>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } FileSystem fs = Services.get().get(HadoopAccessorService.class) .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf); fs.mkdirs(actionDir); OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML)); try { actionConf.writeXml(os); } finally { IOUtils.closeSafely(os); } launcherConf.setInputFormat(OozieLauncherInputFormat.class); launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); }
From source file:org.apache.pig.backend.hadoop.accumulo.Utils.java
License:Apache License
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // add jars as we find them to a map of contents jar name so that we can // avoid/*from ww w . j a v a 2 s . com*/ // creating new jars for classes that have already been packaged. Map<String, String> packagedClasses = new HashMap<String, String>(); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) continue; Path path = findOrCreateJar(clazz, localFs, packagedClasses); if (path == null) { log.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } if (!localFs.exists(path)) { log.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.toString()); } if (jars.isEmpty()) return; conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()]))); }
From source file:org.apache.pig.impl.util.JarManager.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration * such that JobClient will ship them to the cluster and add them to * the DistributedCache/*w w w . j a va 2s . c om*/ * * @param job * Job object * @param classes * classes to find * @throws IOException */ public static void addDependencyJars(Job job, Class<?>... classes) throws IOException { Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); jars.addAll(conf.getStringCollection("tmpjars")); addQualifiedJarsName(fs, jars, classes); if (jars.isEmpty()) return; conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }
From source file:org.apache.sqoop.accumulo.AccumuloUtil.java
License:Apache License
/** * Add the Accumulo jar files to local classpath and dist cache. * @throws IOException/* w ww . j a v a2 s . c o m*/ */ public static void addJars(Job job, SqoopOptions options) throws IOException { if (isLocalJobTracker(job)) { LOG.info("Not adding Accumulo jars to distributed cache in local mode"); } else if (options.isSkipDistCache()) { LOG.info("Not adding Accumulo jars to distributed cache as requested"); } else { Configuration conf = job.getConfiguration(); String accumuloHome = null; String zookeeperHome = null; FileSystem fs = FileSystem.getLocal(conf); if (options != null) { accumuloHome = options.getAccumuloHome(); } if (accumuloHome == null) { accumuloHome = SqoopOptions.getAccumuloHomeDefault(); } LOG.info("Accumulo job : Accumulo Home = " + accumuloHome); if (options != null) { zookeeperHome = options.getZookeeperHome(); } if (zookeeperHome == null) { zookeeperHome = SqoopOptions.getZookeeperHomeDefault(); } LOG.info("Accumulo job : Zookeeper Home = " + zookeeperHome); conf.addResource(accumuloHome + AccumuloConstants.ACCUMULO_SITE_XML_PATH); // Add any libjars already specified Set<String> localUrls = new HashSet<String>(); localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM)); String dir = accumuloHome + File.separator + "lib"; LOG.info("Adding jar files under " + dir + " to distributed cache"); addDirToCache(new File(dir), fs, localUrls, false); dir = zookeeperHome; LOG.info("Adding jar files under " + dir + " to distributed cache"); addDirToCache(new File(dir), fs, localUrls, false); String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM); StringBuilder sb = new StringBuilder(1024); if (null != tmpjars) { sb.append(tmpjars); sb.append(","); } sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0]))); conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString()); } }
From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java
License:Apache License
/** * Add the Hive and HCatalog jar files to local classpath and dist cache. * @throws IOException/*from w ww.j a va 2s.c o m*/ */ public static void addJars(Job job, SqoopOptions options) throws IOException { if (isLocalJobTracker(job)) { LOG.info("Not adding hcatalog jars to distributed cache in local mode"); return; } if (options.isSkipDistCache()) { LOG.info("Not adding hcatalog jars to distributed cache as requested"); return; } Configuration conf = job.getConfiguration(); String hiveHome = null; String hCatHome = null; FileSystem fs = FileSystem.getLocal(conf); if (options != null) { hiveHome = options.getHiveHome(); } if (hiveHome == null) { hiveHome = SqoopOptions.getHiveHomeDefault(); } if (options != null) { hCatHome = options.getHCatHome(); } if (hCatHome == null) { hCatHome = SqoopOptions.getHCatHomeDefault(); } LOG.info("HCatalog job : Hive Home = " + hiveHome); LOG.info("HCatalog job: HCatalog Home = " + hCatHome); conf.addResource(hiveHome + HIVESITEXMLPATH); // Add these to the 'tmpjars' array, which the MR JobSubmitter // will upload to HDFS and put in the DistributedCache libjars. List<String> libDirs = new ArrayList<String>(); libDirs.add(hCatHome + File.separator + HCATSHAREDIR); libDirs.add(hCatHome + File.separator + DEFLIBDIR); libDirs.add(hiveHome + File.separator + DEFLIBDIR); Set<String> localUrls = new HashSet<String>(); // Add any libjars already specified localUrls.addAll(conf.getStringCollection(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM)); for (String dir : libDirs) { LOG.info("Adding jar files under " + dir + " to distributed cache"); addDirToCache(new File(dir), fs, localUrls, false); } // Recursively add all hcatalog storage handler jars // The HBase storage handler is getting deprecated post Hive+HCat merge String hCatStorageHandlerDir = hCatHome + File.separator + "share/hcatalog/storage-handlers"; LOG.info("Adding jar files under " + hCatStorageHandlerDir + " to distributed cache (recursively)"); addDirToCache(new File(hCatStorageHandlerDir), fs, localUrls, true); String tmpjars = conf.get(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM); StringBuilder sb = new StringBuilder(1024); if (null != tmpjars) { sb.append(tmpjars); sb.append(","); } sb.append(StringUtils.arrayToString(localUrls.toArray(new String[0]))); conf.set(ConfigurationConstants.MAPRED_DISTCACHE_CONF_PARAM, sb.toString()); }
From source file:org.apache.tez.runtime.common.resources.TestWeightedScalingMemoryDistributor.java
License:Apache License
@Test(timeout = 5000) public void testSimpleWeightedScaling() { Configuration conf = new Configuration(this.conf); conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 1, 2, 3, 1, 1)); System.err.println(Joiner.on(",") .join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS))); MemoryDistributor dist = new MemoryDistributor(2, 2, conf); dist.setJvmMemory(10000l);/*from www. j ava 2s . com*/ // First request - ScatterGatherShuffleInput MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest(); InputContext e1InputContext1 = createTestInputContext(); InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class); dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1); // Second request - BroadcastInput MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest(); InputContext e2InputContext2 = createTestInputContext(); InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class); dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2); // Third request - randomOutput (simulates MROutput) MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest(); OutputContext e3OutputContext1 = createTestOutputContext(); OutputDescriptor e3OutDesc1 = createTestOutputDescriptor(); dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1); // Fourth request - OnFileSortedOutput MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest(); OutputContext e4OutputContext2 = createTestOutputContext(); OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class); dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2); dist.makeInitialAllocations(); // Total available: 70% of 10K = 7000 // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2) // Scale down to - 3000, 1000, 1000, 2000 assertEquals(3000, e1Callback.assigned); assertEquals(1000, e2Callback.assigned); assertEquals(1000, e3Callback.assigned); assertEquals(2000, e4Callback.assigned); }
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration such * that JobClient will ship them to the cluster and add them to the * DistributedCache./*w w w. j av a2 s. c o m*/ */ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // add jars as we find them to a map of contents jar name so that we can // avoid // creating new jars for classes that have already been packaged. Map<String, String> packagedClasses = new HashMap<String, String>(); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) continue; Path path = findOrCreateJar(clazz, localFs, packagedClasses); if (path == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.toString()); } if (jars.isEmpty()) return; conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }