List of usage examples for org.apache.hadoop.conf Configuration getStringCollection
public Collection<String> getStringCollection(String name)
name
property as a collection of String
s. From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java
License:Apache License
@Test public void testCsvBlurDriverTest2() throws Exception { Configuration configurationSetup = new Configuration(); ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() { @Override/*www . ja v a 2 s . c o m*/ public Iface getClient(String controllerConnectionStr) { return getMockIface(); } }; AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>(); Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i", _path2.toString(), "-S", "-C", "1000000", "2000000"); assertNotNull(job); Configuration configuration = job.getConfiguration(); TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration); assertEquals(tableDescriptor.getName(), "table1"); Collection<String> inputs = configuration.getStringCollection("mapred.input.dir"); assertEquals(2, inputs.size()); Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration); assertEquals(2, familyAndColumnNameMap.size()); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java
License:Apache License
@Test public void testCsvBlurDriverTest3() throws Exception { Configuration configurationSetup = new Configuration(); ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() { @Override/*from w w w . j a va2s. com*/ public Iface getClient(String controllerConnectionStr) { return getMockIface(); } }; AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>(); Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i", _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY"); assertNotNull(job); Configuration configuration = job.getConfiguration(); TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration); assertEquals(tableDescriptor.getName(), "table1"); Collection<String> inputs = configuration.getStringCollection("mapred.input.dir"); assertEquals(2, inputs.size()); Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration); assertEquals(2, familyAndColumnNameMap.size()); assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT)); assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC)); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
protected static void append(Configuration configuration, String name, String value) { Collection<String> set = configuration.getStringCollection(name); if (set == null) { set = new TreeSet<String>(); }//from w w w .ja v a 2s . com set.add(value); configuration.setStrings(name, set.toArray(new String[set.size()])); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
/** * Adds the column layout for the given family. * // w ww . ja va 2 s . c om * @param configuration * the configuration to apply the layout. * @param family * the family name. * @param columns * the column names. */ public static void addColumns(Configuration configuration, String family, String... columns) { Collection<String> families = new TreeSet<String>(configuration.getStringCollection(BLUR_CSV_FAMILIES)); families.add(family); configuration.setStrings(BLUR_CSV_FAMILIES, families.toArray(new String[] {})); configuration.setStrings(BLUR_CSV_FAMILY_COLUMN_PREFIX + family, columns); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
public static Collection<String> getFamilyNames(Configuration configuration) { return configuration.getStringCollection(BLUR_CSV_FAMILIES); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration configuration = context.getConfiguration(); _autoGenerateRecordIdAsHashOfData = isAutoGenerateRecordIdAsHashOfData(configuration); _autoGenerateRowIdAsHashOfData = isAutoGenerateRowIdAsHashOfData(configuration); if (_autoGenerateRecordIdAsHashOfData || _autoGenerateRowIdAsHashOfData) { try {/*from w w w . ja va2s . c o m*/ _digest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } } _columnNameMap = getFamilyAndColumnNameMap(configuration); _separator = new String(Base64.decodeBase64(configuration.get(BLUR_CSV_SEPARATOR_BASE64, _separator)), UTF_8); _splitter = Splitter.on(_separator); Path fileCurrentlyProcessing = getCurrentFile(context); Collection<String> families = configuration.getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES); OUTER: for (String family : families) { Collection<String> pathStrCollection = configuration .getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX + family); for (String pathStr : pathStrCollection) { Path path = new Path(pathStr); FileSystem fileSystem = path.getFileSystem(configuration); path = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); if (isParent(path, fileCurrentlyProcessing)) { _familyFromPath = family; _familyNotInFile = true; break OUTER; } } } }
From source file:org.apache.cassandra.hadoop.AbstractBulkOutputFormat.java
License:Apache License
/** * Get the hosts to ignore as a collection of strings * @param conf job configuration//from w w w. j a v a2s. co m * @return the nodes to ignore as a collection of stirngs */ public static Collection<String> getIgnoreHosts(Configuration conf) { return conf.getStringCollection(AbstractBulkRecordWriter.IGNORE_HOSTS); }
From source file:org.apache.crunch.io.avro.trevni.TrevniKeyTarget.java
License:Apache License
@Override public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) { AvroType<?> atype = (AvroType<?>) ptype; Configuration conf = job.getConfiguration(); if (null == name) { AvroJob.setOutputKeySchema(job, atype.getSchema()); AvroJob.setMapOutputKeySchema(job, atype.getSchema()); Avros.configureReflectDataFactory(conf); configureForMapReduce(job, AvroKey.class, NullWritable.class, FormatBundle.forOutput(TrevniOutputFormat.class), outputPath, null); } else {//from ww w.j a v a2s . c om FormatBundle<TrevniOutputFormat> bundle = FormatBundle.forOutput(TrevniOutputFormat.class); bundle.set("avro.schema.output.key", atype.getSchema().toString()); bundle.set("mapred.output.value.groupfn.class", AvroKeyComparator.class.getName()); bundle.set("mapred.output.key.comparator.class", AvroKeyComparator.class.getName()); bundle.set("avro.serialization.key.writer.schema", atype.getSchema().toString()); bundle.set("avro.serialization.key.reader.schema", atype.getSchema().toString()); //Equivalent to... // AvroSerialization.addToConfiguration(job.getConfiguration()); Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); bundle.set(name, StringUtils.arrayToString(serializations.toArray(new String[serializations.size()]))); } //The following is equivalent to Avros.configureReflectDataFactory(conf); bundle.set(REFLECT_DATA_FACTORY_CLASS, REFLECT_DATA_FACTORY.getClass().getName()); //Set output which honors the name. bundle.set("mapred.output.dir", new Path(outputPath, name).toString()); //Set value which will be ignored but should get past the FileOutputFormat.checkOutputSpecs(..) //which requires the "mapred.output.dir" value to be set. try { FileOutputFormat.setOutputPath(job, outputPath); } catch (Exception ioe) { throw new RuntimeException(ioe); } CrunchOutputs.addNamedOutput(job, name, bundle, AvroKey.class, NullWritable.class); } }
From source file:org.apache.giraph.hive.common.HiveUtils.java
License:Apache License
/** * Add string to collection/*from www . j a va 2 s .co m*/ * * @param conf Configuration * @param key to add * @param values values for collection */ public static void addToStringCollection(Configuration conf, String key, Collection<String> values) { Collection<String> strings = conf.getStringCollection(key); strings.addAll(values); conf.setStrings(key, strings.toArray(new String[strings.size()])); }
From source file:org.apache.hive.hcatalog.templeton.SecureProxySupport.java
License:Apache License
private Token<?>[] getFSDelegationToken(String user, final Configuration conf) throws IOException, InterruptedException { LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); final UserGroupInformation ugi = UgiFactory.getUgi(user); final TokenWrapper twrapper = new TokenWrapper(); ugi.doAs(new PrivilegedExceptionAction<Object>() { public Object run() throws IOException, URISyntaxException { Credentials creds = new Credentials(); //get Tokens for default FS. Not all FSs support delegation tokens, e.g. WASB collectTokens(FileSystem.get(conf), twrapper, creds, ugi.getShortUserName()); //get tokens for all other known FSs since Hive tables may result in different ones //passing "creds" prevents duplicate tokens from being added Collection<String> URIs = conf.getStringCollection("mapreduce.job.hdfs-servers"); for (String uri : URIs) { LOG.debug("Getting tokens for " + uri); collectTokens(FileSystem.get(new URI(uri), conf), twrapper, creds, ugi.getShortUserName()); }/*from w w w . ja v a 2s.c o m*/ return null; } }); return twrapper.tokens; }