List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:com.datasalt.pangool.tuplemr.TupleMRConfig.java
License:Apache License
/** * Serializes the custom compartors. It uses the distributed cache * serialization.<br>//from w w w .j av a2 s . c om * Two config properties are used. The first for storing the reference. For * example "common|address" refers to the sort comparator for address in the * common order. "1|postalCode" refers to the sort comparator for the specific * sort on field1 for the schema with schemaId = 1. The other config property * stores the instance file paths where the instances are stored in the * distributed cache. * <p> * Returns the instance files created. */ static Set<String> serializeComparators(TupleMRConfig tupleMRConfig, Configuration conf) throws TupleMRException { Set<String> instanceFiles = new HashSet<String>(); List<String> comparatorRefs = new ArrayList<String>(); List<String> comparatorInstanceFiles = new ArrayList<String>(); // We use "common" as the prefix for the common criteria instanceFiles.addAll(serializeComparators(tupleMRConfig.getCommonCriteria(), conf, comparatorRefs, comparatorInstanceFiles, COMMON)); List<Criteria> specificCriterias = tupleMRConfig.getSpecificOrderBys(); // We use the schemaId as prefix for the specific sorting. for (int i = 0; i < specificCriterias.size(); i++) { instanceFiles.addAll(serializeComparators(specificCriterias.get(i), conf, comparatorRefs, comparatorInstanceFiles, i + "")); } if (comparatorRefs.size() > 0) { conf.setStrings(CONF_COMPARATOR_REFERENCES, comparatorRefs.toArray(new String[] {})); conf.setStrings(CONF_COMPARATOR_INSTANCES, comparatorInstanceFiles.toArray(new String[] {})); } return instanceFiles; }
From source file:com.datasalt.pangool.utils.AvroUtils.java
License:Apache License
public static void addAvroSerialization(Configuration conf) { Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); conf.setStrings("io.serializations", serializations.toArray(new String[0])); }//from ww w .ja va2s . c o m }
From source file:com.datascience.cascading.scheme.CsvScheme.java
License:Apache License
/** * Configures the Hadoop configuration for the given CSV format. *///from ww w . j av a2 s . c o m private void configureReaderFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order // for the CSV reader to skip the header record. conf.setBoolean(CsvInputFormat.STRICT_MODE, strict); if (format.getHeader() != null) { conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader()); } else if (format.getSkipHeaderRecord()) { Fields fields = getSourceFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns); } conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString()); } }
From source file:com.datascience.cascading.scheme.CsvScheme.java
License:Apache License
/** * Configures the Hadoop configuration for the given CSV format. *//* w w w .java 2 s .c o m*/ private void configureWriterFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set // and headers are configured, headers will always be written to the output. Since we always have headers and/or // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written. if (!format.getSkipHeaderRecord()) { if (format.getHeader() != null && format.getHeader().length != 0) { conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader()); } else { Fields fields = getSinkFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns); } } conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString()); } }
From source file:com.datascience.hadoop.CsvHelper.java
License:Apache License
public Configuration buildConfiguration(String delimiter, String skipHeader, String recordSeparator, String[] columns) {/*from w w w .ja v a 2s .co m*/ Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); conf.set(CsvInputFormat.CSV_READER_DELIMITER, delimiter); conf.set(CsvInputFormat.CSV_READER_SKIP_HEADER, skipHeader); conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, recordSeparator); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS); conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec"); return conf; }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static void setModelPaths(Job job, Path modelPath) throws IOException { Configuration conf = job.getConfiguration(); if (modelPath == null || !FileSystem.get(modelPath.toUri(), conf).exists(modelPath)) { return;/*from ww w .j a va2 s . c o m*/ } FileStatus[] statuses = FileSystem.get(modelPath.toUri(), conf).listStatus(modelPath, PathFilters.partFilter()); Preconditions.checkState(statuses.length > 0, "No part files found in model path '%s'", modelPath.toString()); String[] modelPaths = new String[statuses.length]; for (int i = 0; i < statuses.length; i++) { modelPaths[i] = statuses[i].getPath().toUri().toString(); } conf.setStrings(MODEL_PATHS, modelPaths); }
From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java
License:Apache License
/** * add string to collection/* ww w.j a va 2s .c o m*/ * * @param conf Configuration * @param name to add * @param values values for collection */ private static void addToStringCollection(Configuration conf, String name, Collection<? extends String> values) { Collection<String> tmpfiles = conf.getStringCollection(name); tmpfiles.addAll(values); conf.setStrings(name, tmpfiles.toArray(new String[tmpfiles.size()])); }
From source file:com.facebook.presto.hive.HdfsConfiguration.java
License:Apache License
protected Configuration createConfiguration() { Configuration config = new Configuration(); if (resourcePaths != null) { for (String resourcePath : resourcePaths) { config.addResource(new Path(resourcePath)); }// www.j av a2 s.c o m } // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class); if (socksProxy != null) { config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class, SocketFactory.class); config.set("hadoop.socks.server", socksProxy.toString()); } if (domainSocketPath != null) { config.setStrings("dfs.domain.socket.path", domainSocketPath); } // only enable short circuit reads if domain socket path is properly configured if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) { config.setBooleanIfUnset("dfs.client.read.shortcircuit", true); } config.setInt("dfs.socket.timeout", Ints.checkedCast(dfsTimeout.toMillis())); config.setInt("ipc.ping.interval", Ints.checkedCast(dfsTimeout.toMillis())); config.setInt("ipc.client.connect.timeout", Ints.checkedCast(dfsConnectTimeout.toMillis())); config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries); // re-map filesystem schemes to match Amazon Elastic MapReduce config.set("fs.s3.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem"); // set AWS credentials for S3 for (String scheme : ImmutableList.of("s3", "s3bfs", "s3n")) { if (s3AwsAccessKey != null) { config.set(format("fs.%s.awsAccessKeyId", scheme), s3AwsAccessKey); } if (s3AwsSecretKey != null) { config.set(format("fs.%s.awsSecretAccessKey", scheme), s3AwsSecretKey); } } // set config for S3 config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled); config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries); config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries); config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString()); config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString()); updateConfiguration(config); return config; }
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public void updateConfiguration(Configuration config) { if (resourcePaths != null) { for (String resourcePath : resourcePaths) { config.addResource(new Path(resourcePath)); }/*from www. j a v a 2 s. c o m*/ } // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class); if (socksProxy != null) { config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class, SocketFactory.class); config.set("hadoop.socks.server", socksProxy.toString()); } if (domainSocketPath != null) { config.setStrings("dfs.domain.socket.path", domainSocketPath); } // only enable short circuit reads if domain socket path is properly configured if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) { config.setBooleanIfUnset("dfs.client.read.shortcircuit", true); } config.setInt("dfs.socket.timeout", toIntExact(dfsTimeout.toMillis())); config.setInt("ipc.ping.interval", toIntExact(ipcPingInterval.toMillis())); config.setInt("ipc.client.connect.timeout", toIntExact(dfsConnectTimeout.toMillis())); config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries); // re-map filesystem schemes to match Amazon Elastic MapReduce config.set("fs.s3.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem"); // set AWS credentials for S3 if (s3AwsAccessKey != null) { config.set(PrestoS3FileSystem.S3_ACCESS_KEY, s3AwsAccessKey); config.set("fs.s3bfs.awsAccessKeyId", s3AwsAccessKey); } if (s3AwsSecretKey != null) { config.set(PrestoS3FileSystem.S3_SECRET_KEY, s3AwsSecretKey); config.set("fs.s3bfs.awsSecretAccessKey", s3AwsSecretKey); } if (s3Endpoint != null) { config.set(PrestoS3FileSystem.S3_ENDPOINT, s3Endpoint); config.set("fs.s3bfs.Endpoint", s3Endpoint); } if (s3SignerType != null) { config.set(PrestoS3FileSystem.S3_SIGNER_TYPE, s3SignerType.getSignerType()); } config.setInt("fs.cache.max-size", fileSystemMaxCacheSize); configureCompression(config, compressionCodec); // set config for S3 config.setBoolean(PrestoS3FileSystem.S3_USE_INSTANCE_CREDENTIALS, s3UseInstanceCredentials); config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled); config.setBoolean(PrestoS3FileSystem.S3_SSE_ENABLED, s3SseEnabled); if (s3EncryptionMaterialsProvider != null) { config.set(PrestoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER, s3EncryptionMaterialsProvider); } if (s3KmsKeyId != null) { config.set(PrestoS3FileSystem.S3_KMS_KEY_ID, s3KmsKeyId); } config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries); config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries); config.set(PrestoS3FileSystem.S3_MAX_BACKOFF_TIME, s3MaxBackoffTime.toString()); config.set(PrestoS3FileSystem.S3_MAX_RETRY_TIME, s3MaxRetryTime.toString()); config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString()); config.set(PrestoS3FileSystem.S3_SOCKET_TIMEOUT, s3SocketTimeout.toString()); config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString()); config.setInt(PrestoS3FileSystem.S3_MAX_CONNECTIONS, s3MaxConnections); config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_FILE_SIZE, s3MultipartMinFileSize.toBytes()); config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_PART_SIZE, s3MultipartMinPartSize.toBytes()); config.setBoolean(PrestoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION, pinS3ClientToCurrentRegion); config.set(PrestoS3FileSystem.S3_USER_AGENT_PREFIX, s3UserAgentPrefix); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl.java
License:Apache License
private FileSystem createFileSystem(Configuration hconf, String configFile, boolean forceNew) throws IOException { FileSystem filesystem = null; // load hdfs client config file if specified. The path is on local file // system//from ww w. j a va 2s.com if (configFile != null) { if (logger.isDebugEnabled()) { logger.debug("{}Adding resource config file to hdfs configuration:" + configFile, logPrefix); } hconf.addResource(new Path(configFile)); if (!new File(configFile).exists()) { logger.warn(LocalizedMessage.create(LocalizedStrings.HOPLOG_HDFS_CLIENT_CONFIG_FILE_ABSENT, configFile)); } } // This setting disables shutdown hook for file system object. Shutdown // hook may cause FS object to close before the cache or store and // unpredictable behavior. This setting is provided for GFXD like server // use cases where FS close is managed by a server. This setting is not // supported by old versions of hadoop, HADOOP-4829 hconf.setBoolean("fs.automatic.close", false); // Hadoop has a configuration parameter io.serializations that is a list of serialization // classes which can be used for obtaining serializers and deserializers. This parameter // by default contains avro classes. When a sequence file is created, it calls // SerializationFactory.getSerializer(keyclass). This internally creates objects using // reflection of all the classes that were part of io.serializations. But since, there is // no avro class available it throws an exception. // Before creating a sequenceFile, override the io.serializations parameter and pass only the classes // that are important to us. hconf.setStrings("io.serializations", new String[] { "org.apache.hadoop.io.serializer.WritableSerialization" }); // create writer SchemaMetrics.configureGlobally(hconf); String nameNodeURL = null; if ((nameNodeURL = getNameNodeURL()) == null) { nameNodeURL = hconf.get("fs.default.name"); } URI namenodeURI = URI.create(nameNodeURL); //if (! GemFireCacheImpl.getExisting().isHadoopGfxdLonerMode()) { String authType = hconf.get("hadoop.security.authentication"); //The following code handles Gemfire XD with secure HDFS //A static set is used to cache all known secure HDFS NameNode urls. UserGroupInformation.setConfiguration(hconf); //Compare authentication method ignoring case to make GFXD future version complaint //At least version 2.0.2 starts complaining if the string "kerberos" is not in all small case. //However it seems current version of hadoop accept the authType in any case if (authType.equalsIgnoreCase("kerberos")) { String principal = hconf.get(HoplogConfig.KERBEROS_PRINCIPAL); String keyTab = hconf.get(HoplogConfig.KERBEROS_KEYTAB_FILE); if (!PERFORM_SECURE_HDFS_CHECK) { if (logger.isDebugEnabled()) logger.debug("{}Ignore secure hdfs check", logPrefix); } else { if (!secureNameNodes.contains(nameNodeURL)) { if (logger.isDebugEnabled()) logger.debug("{}Executing secure hdfs check", logPrefix); try { filesystem = FileSystem.newInstance(namenodeURI, hconf); //Make sure no IOExceptions are generated when accessing insecure HDFS. filesystem.listFiles(new Path("/"), false); throw new HDFSIOException( "Gemfire XD HDFS client and HDFS cluster security levels do not match. The configured HDFS Namenode is not secured."); } catch (IOException ex) { secureNameNodes.add(nameNodeURL); } finally { //Close filesystem to avoid resource leak if (filesystem != null) { closeFileSystemIgnoreError(filesystem); } } } } // check to ensure the namenode principal is defined String nameNodePrincipal = hconf.get("dfs.namenode.kerberos.principal"); if (nameNodePrincipal == null) { throw new IOException(LocalizedStrings.GF_KERBEROS_NAMENODE_PRINCIPAL_UNDEF.toLocalizedString()); } // ok, the user specified a gfxd principal so we will try to login if (principal != null) { //If NameNode principal is the same as Gemfire XD principal, there is a //potential security hole String regex = "[/@]"; if (nameNodePrincipal != null) { String HDFSUser = nameNodePrincipal.split(regex)[0]; String GFXDUser = principal.split(regex)[0]; if (HDFSUser.equals(GFXDUser)) { logger.warn( LocalizedMessage.create(LocalizedStrings.HDFS_USER_IS_SAME_AS_GF_USER, GFXDUser)); } } // a keytab must exist if the user specifies a principal if (keyTab == null) { throw new IOException(LocalizedStrings.GF_KERBEROS_KEYTAB_UNDEF.toLocalizedString()); } // the keytab must exist as well File f = new File(keyTab); if (!f.exists()) { throw new FileNotFoundException( LocalizedStrings.GF_KERBEROS_KEYTAB_FILE_ABSENT.toLocalizedString(f.getAbsolutePath())); } //Authenticate Gemfire XD principal to Kerberos KDC using Gemfire XD keytab file String principalWithValidHost = SecurityUtil.getServerPrincipal(principal, ""); UserGroupInformation.loginUserFromKeytab(principalWithValidHost, keyTab); } else { logger.warn(LocalizedMessage.create(LocalizedStrings.GF_KERBEROS_PRINCIPAL_UNDEF)); } } //} filesystem = getFileSystemFactory().create(namenodeURI, hconf, forceNew); if (logger.isDebugEnabled()) { logger.debug("{}Initialized FileSystem linked to " + filesystem.getUri() + " " + filesystem.hashCode(), logPrefix); } return filesystem; }