Example usage for org.apache.hadoop.conf Configuration setStrings

List of usage examples for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values) 

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:com.datasalt.pangool.tuplemr.TupleMRConfig.java

License:Apache License

/**
 * Serializes the custom compartors. It uses the distributed cache
 * serialization.<br>//from   w w w .j av  a2  s  . c  om
 * Two config properties are used. The first for storing the reference. For
 * example "common|address" refers to the sort comparator for address in the
 * common order. "1|postalCode" refers to the sort comparator for the specific
 * sort on field1 for the schema with schemaId = 1. The other config property
 * stores the instance file paths where the instances are stored in the
 * distributed cache.
 * <p>
 * Returns the instance files created.
 */
static Set<String> serializeComparators(TupleMRConfig tupleMRConfig, Configuration conf)
        throws TupleMRException {

    Set<String> instanceFiles = new HashSet<String>();

    List<String> comparatorRefs = new ArrayList<String>();
    List<String> comparatorInstanceFiles = new ArrayList<String>();

    // We use "common" as the prefix for the common criteria
    instanceFiles.addAll(serializeComparators(tupleMRConfig.getCommonCriteria(), conf, comparatorRefs,
            comparatorInstanceFiles, COMMON));

    List<Criteria> specificCriterias = tupleMRConfig.getSpecificOrderBys();
    // We use the schemaId as prefix for the specific sorting.
    for (int i = 0; i < specificCriterias.size(); i++) {
        instanceFiles.addAll(serializeComparators(specificCriterias.get(i), conf, comparatorRefs,
                comparatorInstanceFiles, i + ""));
    }

    if (comparatorRefs.size() > 0) {
        conf.setStrings(CONF_COMPARATOR_REFERENCES, comparatorRefs.toArray(new String[] {}));
        conf.setStrings(CONF_COMPARATOR_INSTANCES, comparatorInstanceFiles.toArray(new String[] {}));
    }

    return instanceFiles;
}

From source file:com.datasalt.pangool.utils.AvroUtils.java

License:Apache License

public static void addAvroSerialization(Configuration conf) {
    Collection<String> serializations = conf.getStringCollection("io.serializations");
    if (!serializations.contains(AvroSerialization.class.getName())) {
        serializations.add(AvroSerialization.class.getName());
        conf.setStrings("io.serializations", serializations.toArray(new String[0]));
    }//from  ww w  .ja va2s  . c  o  m
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

License:Apache License

/**
 * Configures the Hadoop configuration for the given CSV format.
 *///from   ww  w  .  j av a2 s  .  c o  m
private void configureReaderFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord
    // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order
    // for the CSV reader to skip the header record.
    conf.setBoolean(CsvInputFormat.STRICT_MODE, strict);
    if (format.getHeader() != null) {
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader());
    } else if (format.getSkipHeaderRecord()) {
        Fields fields = getSourceFields();
        String[] columns = new String[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            columns[i] = fields.get(i).toString();
        }
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns);
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

License:Apache License

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//* w w  w .java 2 s .c o m*/
private void configureWriterFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set
    // and headers are configured, headers will always be written to the output. Since we always have headers and/or
    // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written.
    if (!format.getSkipHeaderRecord()) {
        if (format.getHeader() != null && format.getHeader().length != 0) {
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader());
        } else {
            Fields fields = getSinkFields();
            String[] columns = new String[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                columns[i] = fields.get(i).toString();
            }
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns);
        }
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datascience.hadoop.CsvHelper.java

License:Apache License

public Configuration buildConfiguration(String delimiter, String skipHeader, String recordSeparator,
        String[] columns) {/*from  w  w  w .ja  v a 2s .co m*/
    Configuration conf = new Configuration();
    conf.set("fs.default.name", "file:///");
    conf.set(CsvInputFormat.CSV_READER_DELIMITER, delimiter);
    conf.set(CsvInputFormat.CSV_READER_SKIP_HEADER, skipHeader);
    conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, recordSeparator);
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
    conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns);
    conf.set("io.compression.codecs",
            "org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec");
    return conf;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static void setModelPaths(Job job, Path modelPath) throws IOException {
    Configuration conf = job.getConfiguration();
    if (modelPath == null || !FileSystem.get(modelPath.toUri(), conf).exists(modelPath)) {
        return;/*from   ww w  .j a va2 s  . c o m*/
    }
    FileStatus[] statuses = FileSystem.get(modelPath.toUri(), conf).listStatus(modelPath,
            PathFilters.partFilter());
    Preconditions.checkState(statuses.length > 0, "No part files found in model path '%s'",
            modelPath.toString());
    String[] modelPaths = new String[statuses.length];
    for (int i = 0; i < statuses.length; i++) {
        modelPaths[i] = statuses[i].getPath().toUri().toString();
    }
    conf.setStrings(MODEL_PATHS, modelPaths);
}

From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java

License:Apache License

/**
 * add string to collection/*  ww  w.j a va 2s .c o m*/
 *
 * @param conf   Configuration
 * @param name   to add
 * @param values values for collection
 */
private static void addToStringCollection(Configuration conf, String name,
        Collection<? extends String> values) {
    Collection<String> tmpfiles = conf.getStringCollection(name);
    tmpfiles.addAll(values);
    conf.setStrings(name, tmpfiles.toArray(new String[tmpfiles.size()]));
}

From source file:com.facebook.presto.hive.HdfsConfiguration.java

License:Apache License

protected Configuration createConfiguration() {
    Configuration config = new Configuration();

    if (resourcePaths != null) {
        for (String resourcePath : resourcePaths) {
            config.addResource(new Path(resourcePath));
        }// www.j  av  a2 s.c  o m
    }

    // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
    config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class,
            DNSToSwitchMapping.class);

    if (socksProxy != null) {
        config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class,
                SocketFactory.class);
        config.set("hadoop.socks.server", socksProxy.toString());
    }

    if (domainSocketPath != null) {
        config.setStrings("dfs.domain.socket.path", domainSocketPath);
    }

    // only enable short circuit reads if domain socket path is properly configured
    if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) {
        config.setBooleanIfUnset("dfs.client.read.shortcircuit", true);
    }

    config.setInt("dfs.socket.timeout", Ints.checkedCast(dfsTimeout.toMillis()));
    config.setInt("ipc.ping.interval", Ints.checkedCast(dfsTimeout.toMillis()));
    config.setInt("ipc.client.connect.timeout", Ints.checkedCast(dfsConnectTimeout.toMillis()));
    config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries);

    // re-map filesystem schemes to match Amazon Elastic MapReduce
    config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem");

    // set AWS credentials for S3
    for (String scheme : ImmutableList.of("s3", "s3bfs", "s3n")) {
        if (s3AwsAccessKey != null) {
            config.set(format("fs.%s.awsAccessKeyId", scheme), s3AwsAccessKey);
        }
        if (s3AwsSecretKey != null) {
            config.set(format("fs.%s.awsSecretAccessKey", scheme), s3AwsSecretKey);
        }
    }

    // set config for S3
    config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled);
    config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries);
    config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries);
    config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString());
    config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString());

    updateConfiguration(config);

    return config;
}

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public void updateConfiguration(Configuration config) {
    if (resourcePaths != null) {
        for (String resourcePath : resourcePaths) {
            config.addResource(new Path(resourcePath));
        }/*from www. j a v  a  2  s.  c o  m*/
    }

    // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
    config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class,
            DNSToSwitchMapping.class);

    if (socksProxy != null) {
        config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class,
                SocketFactory.class);
        config.set("hadoop.socks.server", socksProxy.toString());
    }

    if (domainSocketPath != null) {
        config.setStrings("dfs.domain.socket.path", domainSocketPath);
    }

    // only enable short circuit reads if domain socket path is properly configured
    if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) {
        config.setBooleanIfUnset("dfs.client.read.shortcircuit", true);
    }

    config.setInt("dfs.socket.timeout", toIntExact(dfsTimeout.toMillis()));
    config.setInt("ipc.ping.interval", toIntExact(ipcPingInterval.toMillis()));
    config.setInt("ipc.client.connect.timeout", toIntExact(dfsConnectTimeout.toMillis()));
    config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries);

    // re-map filesystem schemes to match Amazon Elastic MapReduce
    config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem");

    // set AWS credentials for S3
    if (s3AwsAccessKey != null) {
        config.set(PrestoS3FileSystem.S3_ACCESS_KEY, s3AwsAccessKey);
        config.set("fs.s3bfs.awsAccessKeyId", s3AwsAccessKey);
    }
    if (s3AwsSecretKey != null) {
        config.set(PrestoS3FileSystem.S3_SECRET_KEY, s3AwsSecretKey);
        config.set("fs.s3bfs.awsSecretAccessKey", s3AwsSecretKey);
    }
    if (s3Endpoint != null) {
        config.set(PrestoS3FileSystem.S3_ENDPOINT, s3Endpoint);
        config.set("fs.s3bfs.Endpoint", s3Endpoint);
    }
    if (s3SignerType != null) {
        config.set(PrestoS3FileSystem.S3_SIGNER_TYPE, s3SignerType.getSignerType());
    }

    config.setInt("fs.cache.max-size", fileSystemMaxCacheSize);

    configureCompression(config, compressionCodec);

    // set config for S3
    config.setBoolean(PrestoS3FileSystem.S3_USE_INSTANCE_CREDENTIALS, s3UseInstanceCredentials);
    config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled);
    config.setBoolean(PrestoS3FileSystem.S3_SSE_ENABLED, s3SseEnabled);
    if (s3EncryptionMaterialsProvider != null) {
        config.set(PrestoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER, s3EncryptionMaterialsProvider);
    }
    if (s3KmsKeyId != null) {
        config.set(PrestoS3FileSystem.S3_KMS_KEY_ID, s3KmsKeyId);
    }
    config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries);
    config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries);
    config.set(PrestoS3FileSystem.S3_MAX_BACKOFF_TIME, s3MaxBackoffTime.toString());
    config.set(PrestoS3FileSystem.S3_MAX_RETRY_TIME, s3MaxRetryTime.toString());
    config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString());
    config.set(PrestoS3FileSystem.S3_SOCKET_TIMEOUT, s3SocketTimeout.toString());
    config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString());
    config.setInt(PrestoS3FileSystem.S3_MAX_CONNECTIONS, s3MaxConnections);
    config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_FILE_SIZE, s3MultipartMinFileSize.toBytes());
    config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_PART_SIZE, s3MultipartMinPartSize.toBytes());
    config.setBoolean(PrestoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION, pinS3ClientToCurrentRegion);
    config.set(PrestoS3FileSystem.S3_USER_AGENT_PREFIX, s3UserAgentPrefix);
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl.java

License:Apache License

private FileSystem createFileSystem(Configuration hconf, String configFile, boolean forceNew)
        throws IOException {
    FileSystem filesystem = null;

    // load hdfs client config file if specified. The path is on local file
    // system//from ww w.  j  a va  2s.com
    if (configFile != null) {
        if (logger.isDebugEnabled()) {
            logger.debug("{}Adding resource config file to hdfs configuration:" + configFile, logPrefix);
        }
        hconf.addResource(new Path(configFile));

        if (!new File(configFile).exists()) {
            logger.warn(LocalizedMessage.create(LocalizedStrings.HOPLOG_HDFS_CLIENT_CONFIG_FILE_ABSENT,
                    configFile));
        }
    }

    // This setting disables shutdown hook for file system object. Shutdown
    // hook may cause FS object to close before the cache or store and
    // unpredictable behavior. This setting is provided for GFXD like server
    // use cases where FS close is managed by a server. This setting is not
    // supported by old versions of hadoop, HADOOP-4829
    hconf.setBoolean("fs.automatic.close", false);

    // Hadoop has a configuration parameter io.serializations that is a list of serialization 
    // classes which can be used for obtaining serializers and deserializers. This parameter 
    // by default contains avro classes. When a sequence file is created, it calls 
    // SerializationFactory.getSerializer(keyclass). This internally creates objects using 
    // reflection of all the classes that were part of io.serializations. But since, there is 
    // no avro class available it throws an exception. 
    // Before creating a sequenceFile, override the io.serializations parameter and pass only the classes 
    // that are important to us. 
    hconf.setStrings("io.serializations",
            new String[] { "org.apache.hadoop.io.serializer.WritableSerialization" });
    // create writer

    SchemaMetrics.configureGlobally(hconf);

    String nameNodeURL = null;
    if ((nameNodeURL = getNameNodeURL()) == null) {
        nameNodeURL = hconf.get("fs.default.name");
    }

    URI namenodeURI = URI.create(nameNodeURL);

    //if (! GemFireCacheImpl.getExisting().isHadoopGfxdLonerMode()) {
    String authType = hconf.get("hadoop.security.authentication");

    //The following code handles Gemfire XD with secure HDFS
    //A static set is used to cache all known secure HDFS NameNode urls.
    UserGroupInformation.setConfiguration(hconf);

    //Compare authentication method ignoring case to make GFXD future version complaint
    //At least version 2.0.2 starts complaining if the string "kerberos" is not in all small case.
    //However it seems current version of hadoop accept the authType in any case
    if (authType.equalsIgnoreCase("kerberos")) {

        String principal = hconf.get(HoplogConfig.KERBEROS_PRINCIPAL);
        String keyTab = hconf.get(HoplogConfig.KERBEROS_KEYTAB_FILE);

        if (!PERFORM_SECURE_HDFS_CHECK) {
            if (logger.isDebugEnabled())
                logger.debug("{}Ignore secure hdfs check", logPrefix);
        } else {
            if (!secureNameNodes.contains(nameNodeURL)) {
                if (logger.isDebugEnabled())
                    logger.debug("{}Executing secure hdfs check", logPrefix);
                try {
                    filesystem = FileSystem.newInstance(namenodeURI, hconf);
                    //Make sure no IOExceptions are generated when accessing insecure HDFS. 
                    filesystem.listFiles(new Path("/"), false);
                    throw new HDFSIOException(
                            "Gemfire XD HDFS client and HDFS cluster security levels do not match. The configured HDFS Namenode is not secured.");
                } catch (IOException ex) {
                    secureNameNodes.add(nameNodeURL);
                } finally {
                    //Close filesystem to avoid resource leak
                    if (filesystem != null) {
                        closeFileSystemIgnoreError(filesystem);
                    }
                }
            }
        }

        // check to ensure the namenode principal is defined
        String nameNodePrincipal = hconf.get("dfs.namenode.kerberos.principal");
        if (nameNodePrincipal == null) {
            throw new IOException(LocalizedStrings.GF_KERBEROS_NAMENODE_PRINCIPAL_UNDEF.toLocalizedString());
        }

        // ok, the user specified a gfxd principal so we will try to login
        if (principal != null) {
            //If NameNode principal is the same as Gemfire XD principal, there is a 
            //potential security hole
            String regex = "[/@]";
            if (nameNodePrincipal != null) {
                String HDFSUser = nameNodePrincipal.split(regex)[0];
                String GFXDUser = principal.split(regex)[0];
                if (HDFSUser.equals(GFXDUser)) {
                    logger.warn(
                            LocalizedMessage.create(LocalizedStrings.HDFS_USER_IS_SAME_AS_GF_USER, GFXDUser));
                }
            }

            // a keytab must exist if the user specifies a principal
            if (keyTab == null) {
                throw new IOException(LocalizedStrings.GF_KERBEROS_KEYTAB_UNDEF.toLocalizedString());
            }

            // the keytab must exist as well
            File f = new File(keyTab);
            if (!f.exists()) {
                throw new FileNotFoundException(
                        LocalizedStrings.GF_KERBEROS_KEYTAB_FILE_ABSENT.toLocalizedString(f.getAbsolutePath()));
            }

            //Authenticate Gemfire XD principal to Kerberos KDC using Gemfire XD keytab file
            String principalWithValidHost = SecurityUtil.getServerPrincipal(principal, "");
            UserGroupInformation.loginUserFromKeytab(principalWithValidHost, keyTab);
        } else {
            logger.warn(LocalizedMessage.create(LocalizedStrings.GF_KERBEROS_PRINCIPAL_UNDEF));
        }
    }
    //}

    filesystem = getFileSystemFactory().create(namenodeURI, hconf, forceNew);

    if (logger.isDebugEnabled()) {
        logger.debug("{}Initialized FileSystem linked to " + filesystem.getUri() + " " + filesystem.hashCode(),
                logPrefix);
    }
    return filesystem;
}