Example usage for org.apache.hadoop.conf Configuration getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStringCollection.

Prototype

public Collection<String> getStringCollection(String name)

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java

License:Apache License

@Test
public void testCsvBlurDriverTest2() throws Exception {
    Configuration configurationSetup = new Configuration();
    ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
        @Override/*www .  ja v  a  2  s .  c o m*/
        public Iface getClient(String controllerConnectionStr) {
            return getMockIface();
        }
    };
    AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
    Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d",
            "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(),
            "-i", _path2.toString(), "-S", "-C", "1000000", "2000000");
    assertNotNull(job);
    Configuration configuration = job.getConfiguration();
    TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
    assertEquals(tableDescriptor.getName(), "table1");
    Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
    assertEquals(2, inputs.size());
    Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
    assertEquals(2, familyAndColumnNameMap.size());
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java

License:Apache License

@Test
public void testCsvBlurDriverTest3() throws Exception {
    Configuration configurationSetup = new Configuration();
    ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
        @Override/*from w  w w  .  j a  va2s. com*/
        public Iface getClient(String controllerConnectionStr) {
            return getMockIface();
        }
    };
    AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
    Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d",
            "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(),
            "-i", _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY");
    assertNotNull(job);
    Configuration configuration = job.getConfiguration();
    TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
    assertEquals(tableDescriptor.getName(), "table1");
    Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
    assertEquals(2, inputs.size());
    Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
    assertEquals(2, familyAndColumnNameMap.size());
    assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT));
    assertEquals(SnappyCodec.class.getName(),
            configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC));
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

protected static void append(Configuration configuration, String name, String value) {
    Collection<String> set = configuration.getStringCollection(name);
    if (set == null) {
        set = new TreeSet<String>();
    }//from w  w w .ja v a  2s  . com
    set.add(value);
    configuration.setStrings(name, set.toArray(new String[set.size()]));
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

/**
 * Adds the column layout for the given family.
 * //  w  ww  .  ja va 2  s .  c om
 * @param configuration
 *          the configuration to apply the layout.
 * @param family
 *          the family name.
 * @param columns
 *          the column names.
 */
public static void addColumns(Configuration configuration, String family, String... columns) {
    Collection<String> families = new TreeSet<String>(configuration.getStringCollection(BLUR_CSV_FAMILIES));
    families.add(family);
    configuration.setStrings(BLUR_CSV_FAMILIES, families.toArray(new String[] {}));
    configuration.setStrings(BLUR_CSV_FAMILY_COLUMN_PREFIX + family, columns);
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

public static Collection<String> getFamilyNames(Configuration configuration) {
    return configuration.getStringCollection(BLUR_CSV_FAMILIES);
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration configuration = context.getConfiguration();
    _autoGenerateRecordIdAsHashOfData = isAutoGenerateRecordIdAsHashOfData(configuration);
    _autoGenerateRowIdAsHashOfData = isAutoGenerateRowIdAsHashOfData(configuration);
    if (_autoGenerateRecordIdAsHashOfData || _autoGenerateRowIdAsHashOfData) {
        try {/*from w w w  .  ja  va2s . c o m*/
            _digest = MessageDigest.getInstance("MD5");
        } catch (NoSuchAlgorithmException e) {
            throw new IOException(e);
        }
    }
    _columnNameMap = getFamilyAndColumnNameMap(configuration);
    _separator = new String(Base64.decodeBase64(configuration.get(BLUR_CSV_SEPARATOR_BASE64, _separator)),
            UTF_8);
    _splitter = Splitter.on(_separator);
    Path fileCurrentlyProcessing = getCurrentFile(context);
    Collection<String> families = configuration.getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES);
    OUTER: for (String family : families) {
        Collection<String> pathStrCollection = configuration
                .getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX + family);
        for (String pathStr : pathStrCollection) {
            Path path = new Path(pathStr);
            FileSystem fileSystem = path.getFileSystem(configuration);
            path = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            if (isParent(path, fileCurrentlyProcessing)) {
                _familyFromPath = family;
                _familyNotInFile = true;
                break OUTER;
            }
        }
    }
}

From source file:org.apache.cassandra.hadoop.AbstractBulkOutputFormat.java

License:Apache License

/**
 * Get the hosts to ignore as a collection of strings
 * @param conf job configuration//from   w  w  w. j a v  a2s.  co  m
 * @return the nodes to ignore as a collection of stirngs
 */
public static Collection<String> getIgnoreHosts(Configuration conf) {
    return conf.getStringCollection(AbstractBulkRecordWriter.IGNORE_HOSTS);
}

From source file:org.apache.crunch.io.avro.trevni.TrevniKeyTarget.java

License:Apache License

@Override
public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
    AvroType<?> atype = (AvroType<?>) ptype;
    Configuration conf = job.getConfiguration();

    if (null == name) {
        AvroJob.setOutputKeySchema(job, atype.getSchema());
        AvroJob.setMapOutputKeySchema(job, atype.getSchema());

        Avros.configureReflectDataFactory(conf);
        configureForMapReduce(job, AvroKey.class, NullWritable.class,
                FormatBundle.forOutput(TrevniOutputFormat.class), outputPath, null);
    } else {//from ww  w.j  a  v a2s  . c om
        FormatBundle<TrevniOutputFormat> bundle = FormatBundle.forOutput(TrevniOutputFormat.class);

        bundle.set("avro.schema.output.key", atype.getSchema().toString());
        bundle.set("mapred.output.value.groupfn.class", AvroKeyComparator.class.getName());
        bundle.set("mapred.output.key.comparator.class", AvroKeyComparator.class.getName());
        bundle.set("avro.serialization.key.writer.schema", atype.getSchema().toString());
        bundle.set("avro.serialization.key.reader.schema", atype.getSchema().toString());

        //Equivalent to...
        // AvroSerialization.addToConfiguration(job.getConfiguration());
        Collection<String> serializations = conf.getStringCollection("io.serializations");
        if (!serializations.contains(AvroSerialization.class.getName())) {
            serializations.add(AvroSerialization.class.getName());
            bundle.set(name,
                    StringUtils.arrayToString(serializations.toArray(new String[serializations.size()])));
        }

        //The following is equivalent to Avros.configureReflectDataFactory(conf);
        bundle.set(REFLECT_DATA_FACTORY_CLASS, REFLECT_DATA_FACTORY.getClass().getName());

        //Set output which honors the name.
        bundle.set("mapred.output.dir", new Path(outputPath, name).toString());

        //Set value which will be ignored but should get past the FileOutputFormat.checkOutputSpecs(..)
        //which requires the "mapred.output.dir" value to be set.
        try {
            FileOutputFormat.setOutputPath(job, outputPath);
        } catch (Exception ioe) {
            throw new RuntimeException(ioe);
        }

        CrunchOutputs.addNamedOutput(job, name, bundle, AvroKey.class, NullWritable.class);
    }
}

From source file:org.apache.giraph.hive.common.HiveUtils.java

License:Apache License

/**
 * Add string to collection/*from   www .  j a  va 2  s  .co m*/
 *
 * @param conf Configuration
 * @param key to add
 * @param values values for collection
 */
public static void addToStringCollection(Configuration conf, String key, Collection<String> values) {
    Collection<String> strings = conf.getStringCollection(key);
    strings.addAll(values);
    conf.setStrings(key, strings.toArray(new String[strings.size()]));
}

From source file:org.apache.hive.hcatalog.templeton.SecureProxySupport.java

License:Apache License

private Token<?>[] getFSDelegationToken(String user, final Configuration conf)
        throws IOException, InterruptedException {
    LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName());
    final UserGroupInformation ugi = UgiFactory.getUgi(user);

    final TokenWrapper twrapper = new TokenWrapper();
    ugi.doAs(new PrivilegedExceptionAction<Object>() {
        public Object run() throws IOException, URISyntaxException {
            Credentials creds = new Credentials();
            //get Tokens for default FS.  Not all FSs support delegation tokens, e.g. WASB
            collectTokens(FileSystem.get(conf), twrapper, creds, ugi.getShortUserName());
            //get tokens for all other known FSs since Hive tables may result in different ones
            //passing "creds" prevents duplicate tokens from being added
            Collection<String> URIs = conf.getStringCollection("mapreduce.job.hdfs-servers");
            for (String uri : URIs) {
                LOG.debug("Getting tokens for " + uri);
                collectTokens(FileSystem.get(new URI(uri), conf), twrapper, creds, ugi.getShortUserName());
            }/*from w  w w  .  ja  v a 2s.c  o m*/
            return null;
        }
    });
    return twrapper.tokens;
}