Example usage for org.apache.hadoop.conf Configuration getStringCollection

List of usage examples for org.apache.hadoop.conf Configuration getStringCollection

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStringCollection.

Prototype

public Collection<String> getStringCollection(String name) 

Source Link

Document

Get the comma delimited values of the name property as a collection of Strings.

Usage

From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java

License:Apache License

@Test
public void testCsvBlurDriverTest2() throws Exception {
    Configuration configurationSetup = new Configuration();
    ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
        @Override/*www .  ja v  a  2  s .  c o m*/
        public Iface getClient(String controllerConnectionStr) {
            return getMockIface();
        }
    };
    AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
    Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d",
            "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(),
            "-i", _path2.toString(), "-S", "-C", "1000000", "2000000");
    assertNotNull(job);
    Configuration configuration = job.getConfiguration();
    TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
    assertEquals(tableDescriptor.getName(), "table1");
    Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
    assertEquals(2, inputs.size());
    Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
    assertEquals(2, familyAndColumnNameMap.size());
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurDriverTest.java

License:Apache License

@Test
public void testCsvBlurDriverTest3() throws Exception {
    Configuration configurationSetup = new Configuration();
    ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
        @Override/*from w  w w  .  j a  va2s. com*/
        public Iface getClient(String controllerConnectionStr) {
            return getMockIface();
        }
    };
    AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
    Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d",
            "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(),
            "-i", _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY");
    assertNotNull(job);
    Configuration configuration = job.getConfiguration();
    TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
    assertEquals(tableDescriptor.getName(), "table1");
    Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
    assertEquals(2, inputs.size());
    Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
    assertEquals(2, familyAndColumnNameMap.size());
    assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT));
    assertEquals(SnappyCodec.class.getName(),
            configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC));
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

protected static void append(Configuration configuration, String name, String value) {
    Collection<String> set = configuration.getStringCollection(name);
    if (set == null) {
        set = new TreeSet<String>();
    }//from w  w w .ja v a  2s  . com
    set.add(value);
    configuration.setStrings(name, set.toArray(new String[set.size()]));
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

/**
 * Adds the column layout for the given family.
 * //  w  ww  .  ja va 2  s .  c om
 * @param configuration
 *          the configuration to apply the layout.
 * @param family
 *          the family name.
 * @param columns
 *          the column names.
 */
public static void addColumns(Configuration configuration, String family, String... columns) {
    Collection<String> families = new TreeSet<String>(configuration.getStringCollection(BLUR_CSV_FAMILIES));
    families.add(family);
    configuration.setStrings(BLUR_CSV_FAMILIES, families.toArray(new String[] {}));
    configuration.setStrings(BLUR_CSV_FAMILY_COLUMN_PREFIX + family, columns);
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

public static Collection<String> getFamilyNames(Configuration configuration) {
    return configuration.getStringCollection(BLUR_CSV_FAMILIES);
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration configuration = context.getConfiguration();
    _autoGenerateRecordIdAsHashOfData = isAutoGenerateRecordIdAsHashOfData(configuration);
    _autoGenerateRowIdAsHashOfData = isAutoGenerateRowIdAsHashOfData(configuration);
    if (_autoGenerateRecordIdAsHashOfData || _autoGenerateRowIdAsHashOfData) {
        try {/*from w w w  .  ja  va2s . c o m*/
            _digest = MessageDigest.getInstance("MD5");
        } catch (NoSuchAlgorithmException e) {
            throw new IOException(e);
        }
    }
    _columnNameMap = getFamilyAndColumnNameMap(configuration);
    _separator = new String(Base64.decodeBase64(configuration.get(BLUR_CSV_SEPARATOR_BASE64, _separator)),
            UTF_8);
    _splitter = Splitter.on(_separator);
    Path fileCurrentlyProcessing = getCurrentFile(context);
    Collection<String> families = configuration.getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES);
    OUTER: for (String family : families) {
        Collection<String> pathStrCollection = configuration
                .getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX + family);
        for (String pathStr : pathStrCollection) {
            Path path = new Path(pathStr);
            FileSystem fileSystem = path.getFileSystem(configuration);
            path = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            if (isParent(path, fileCurrentlyProcessing)) {
                _familyFromPath = family;
                _familyNotInFile = true;
                break OUTER;
            }
        }
    }
}

From source file:org.apache.cassandra.hadoop.AbstractBulkOutputFormat.java

License:Apache License

/**
 * Get the hosts to ignore as a collection of strings
 * @param conf job configuration//from   w  w  w. j a v  a2s.  co  m
 * @return the nodes to ignore as a collection of stirngs
 */
public static Collection<String> getIgnoreHosts(Configuration conf) {
    return conf.getStringCollection(AbstractBulkRecordWriter.IGNORE_HOSTS);
}

From source file:org.apache.crunch.io.avro.trevni.TrevniKeyTarget.java

License:Apache License

@Override
public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
    AvroType<?> atype = (AvroType<?>) ptype;
    Configuration conf = job.getConfiguration();

    if (null == name) {
        AvroJob.setOutputKeySchema(job, atype.getSchema());
        AvroJob.setMapOutputKeySchema(job, atype.getSchema());

        Avros.configureReflectDataFactory(conf);
        configureForMapReduce(job, AvroKey.class, NullWritable.class,
                FormatBundle.forOutput(TrevniOutputFormat.class), outputPath, null);
    } else {//from ww  w.j  a  v a2s  . c om
        FormatBundle<TrevniOutputFormat> bundle = FormatBundle.forOutput(TrevniOutputFormat.class);

        bundle.set("avro.schema.output.key", atype.getSchema().toString());
        bundle.set("mapred.output.value.groupfn.class", AvroKeyComparator.class.getName());
        bundle.set("mapred.output.key.comparator.class", AvroKeyComparator.class.getName());
        bundle.set("avro.serialization.key.writer.schema", atype.getSchema().toString());
        bundle.set("avro.serialization.key.reader.schema", atype.getSchema().toString());

        //Equivalent to...
        // AvroSerialization.addToConfiguration(job.getConfiguration());
        Collection<String> serializations = conf.getStringCollection("io.serializations");
        if (!serializations.contains(AvroSerialization.class.getName())) {
            serializations.add(AvroSerialization.class.getName());
            bundle.set(name,
                    StringUtils.arrayToString(serializations.toArray(new String[serializations.size()])));
        }

        //The following is equivalent to Avros.configureReflectDataFactory(conf);
        bundle.set(REFLECT_DATA_FACTORY_CLASS, REFLECT_DATA_FACTORY.getClass().getName());

        //Set output which honors the name.
        bundle.set("mapred.output.dir", new Path(outputPath, name).toString());

        //Set value which will be ignored but should get past the FileOutputFormat.checkOutputSpecs(..)
        //which requires the "mapred.output.dir" value to be set.
        try {
            FileOutputFormat.setOutputPath(job, outputPath);
        } catch (Exception ioe) {
            throw new RuntimeException(ioe);
        }

        CrunchOutputs.addNamedOutput(job, name, bundle, AvroKey.class, NullWritable.class);
    }
}

From source file:org.apache.giraph.hive.common.HiveUtils.java

License:Apache License

/**
 * Add string to collection/*from   www .  j a  va 2  s  .co m*/
 *
 * @param conf Configuration
 * @param key to add
 * @param values values for collection
 */
public static void addToStringCollection(Configuration conf, String key, Collection<String> values) {
    Collection<String> strings = conf.getStringCollection(key);
    strings.addAll(values);
    conf.setStrings(key, strings.toArray(new String[strings.size()]));
}

From source file:org.apache.hive.hcatalog.templeton.SecureProxySupport.java

License:Apache License

private Token<?>[] getFSDelegationToken(String user, final Configuration conf)
        throws IOException, InterruptedException {
    LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName());
    final UserGroupInformation ugi = UgiFactory.getUgi(user);

    final TokenWrapper twrapper = new TokenWrapper();
    ugi.doAs(new PrivilegedExceptionAction<Object>() {
        public Object run() throws IOException, URISyntaxException {
            Credentials creds = new Credentials();
            //get Tokens for default FS.  Not all FSs support delegation tokens, e.g. WASB
            collectTokens(FileSystem.get(conf), twrapper, creds, ugi.getShortUserName());
            //get tokens for all other known FSs since Hive tables may result in different ones
            //passing "creds" prevents duplicate tokens from being added
            Collection<String> URIs = conf.getStringCollection("mapreduce.job.hdfs-servers");
            for (String uri : URIs) {
                LOG.debug("Getting tokens for " + uri);
                collectTokens(FileSystem.get(new URI(uri), conf), twrapper, creds, ugi.getShortUserName());
            }/*from w  w w  .  ja  v a 2s.c  o m*/
            return null;
        }
    });
    return twrapper.tokens;
}