List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:importToTable.importDataJob.java
public void importData(String filePath, String tableName, String[] mappedTableFamilies, String[] mappedFamilies, String[] keys) throws Exception { //Use family class get columns names. family familyName = new family(); String[] column = familyName.columnfamily(filePath); //Mapreduce job. Configuration conf = new Configuration(); conf.setStrings("column", column); conf.setStrings("mappedFamilies", mappedFamilies); conf.setStrings("mappedTableFamilies", mappedTableFamilies); conf.setStrings("keys", keys); conf.set("tableName", tableName); Job job = Job.getInstance(conf, "VCF"); job.setJarByClass(importToTable.importDataJob.class); job.setMapperClass(importToTable.importDataMapper.class); job.setReducerClass(org.apache.hadoop.hbase.mapreduce.PutSortReducer.class); // TODO: specify output types job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setOutputFormatClass(HFileOutputFormat.class); // TODO: specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(job, new Path(filePath)); File temp = new File("/tmp/HVCF/output"); this.deleteDir(temp); FileOutputFormat.setOutputPath(job, new Path("/tmp/HVCF/output")); Configuration hbconf = HBaseConfiguration.create(); HTable table = new HTable(hbconf, tableName); HFileOutputFormat.configureIncrementalLoad(job, table); if (!job.waitForCompletion(true)) { return;/* www . j av a 2s .c om*/ } //Bulkload Hfiles into HBase table. LoadIncrementalHFiles loader = new LoadIncrementalHFiles(hbconf); loader.doBulkLoad(new Path("/tmp/HVCF/output"), table); }
From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java
License:Apache License
public void initializeConfiguration(Configuration config) { copy(resourcesConfiguration, config); // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local config.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class); if (socksProxy != null) { config.setClass(HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY, SocksSocketFactory.class, SocketFactory.class); config.set(HADOOP_SOCKS_SERVER_KEY, socksProxy.toString()); }/*www . java 2 s . c o m*/ if (domainSocketPath != null) { config.setStrings(DFS_DOMAIN_SOCKET_PATH_KEY, domainSocketPath); } // only enable short circuit reads if domain socket path is properly configured if (!config.get(DFS_DOMAIN_SOCKET_PATH_KEY, "").trim().isEmpty()) { config.setBooleanIfUnset(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true); } config.setInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, toIntExact(dfsTimeout.toMillis())); config.setInt(IPC_PING_INTERVAL_KEY, toIntExact(ipcPingInterval.toMillis())); config.setInt(IPC_CLIENT_CONNECT_TIMEOUT_KEY, toIntExact(dfsConnectTimeout.toMillis())); config.setInt(IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, dfsConnectMaxRetries); if (isHdfsWireEncryptionEnabled) { config.set(HADOOP_RPC_PROTECTION, "privacy"); config.setBoolean("dfs.encrypt.data.transfer", true); } config.setInt("fs.cache.max-size", fileSystemMaxCacheSize); config.setInt(LineRecordReader.MAX_LINE_LENGTH, textMaxLineLength); configureCompression(config, compressionCodec); s3ConfigurationUpdater.updateConfiguration(config); gcsConfigurationInitialize.updateConfiguration(config); }
From source file:mvm.rya.accumulo.pig.IndexWritingTool.java
License:Apache License
public void setVarOrders(String s, Configuration conf) throws MalformedQueryException { SPARQLParser parser = new SPARQLParser(); TupleExpr query = parser.parseQuery(s, null).getTupleExpr(); List<String> projList = Lists.newArrayList(((Projection) query).getProjectionElemList().getTargetNames()); String projElems = Joiner.on(";").join(projList); conf.set("projElems", projElems); Pattern splitPattern1 = Pattern.compile("\n"); Pattern splitPattern2 = Pattern.compile(","); String[] lines = splitPattern1.split(s); List<String> varOrders = Lists.newArrayList(); List<String> varOrderPos = Lists.newArrayList(); int orderNum = 0; int projSizeSq = projList.size() * projList.size(); for (String t : lines) { if (orderNum > projSizeSq) { break; }//from w w w . j a va2s.co m String[] order = null; if (t.startsWith("#prefix")) { t = t.substring(7).trim(); order = splitPattern2.split(t, projList.size()); } String tempVarOrder = ""; String tempVarOrderPos = ""; if (order != null) { for (String u : order) { if (tempVarOrder.length() == 0) { tempVarOrder = u.trim(); } else { tempVarOrder = tempVarOrder + ";" + u.trim(); } int pos = projList.indexOf(u.trim()); if (pos < 0) { throw new IllegalArgumentException("Invalid variable order!"); } else { if (tempVarOrderPos.length() == 0) { tempVarOrderPos = tempVarOrderPos + pos; } else { tempVarOrderPos = tempVarOrderPos + ";" + pos; } } } varOrders.add(tempVarOrder); varOrderPos.add(tempVarOrderPos); } if (tempVarOrder.length() > 0) { orderNum++; } } if (orderNum == 0) { varOrders.add(projElems); String tempVarPos = ""; for (int i = 0; i < projList.size(); i++) { if (i == 0) { tempVarPos = Integer.toString(0); } else { tempVarPos = tempVarPos + ";" + i; } } varOrderPos.add(tempVarPos); } String[] vOrders = varOrders.toArray(new String[varOrders.size()]); String[] vOrderPos = varOrderPos.toArray(new String[varOrderPos.size()]); conf.setStrings("varOrders", vOrders); conf.setStrings("varOrderPos", vOrderPos); }
From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java
License:Apache License
/** * Sets the input ranges to scan on all input tables for this job. If not set, the entire table will be scanned. * * @param implementingClass/*from www . ja va 2 s. c om*/ * the class whose name will be used as a prefix for the property configuration key * @param conf * the Hadoop configuration object to configure * @param ranges * the ranges that will be mapped over * @throws IllegalArgumentException * if the ranges cannot be encoded into base 64 * @since 1.6.0 */ public static void setRanges(Class<?> implementingClass, Configuration conf, Collection<Range> ranges) { checkArgument(ranges != null, "ranges is null"); ArrayList<String> rangeStrings = new ArrayList<>(ranges.size()); try { for (Range r : ranges) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); r.write(new DataOutputStream(baos)); rangeStrings.add(Base64.getEncoder().encodeToString(baos.toByteArray())); } conf.setStrings(enumToConfKey(implementingClass, ScanOpts.RANGES), rangeStrings.toArray(new String[0])); } catch (IOException ex) { throw new IllegalArgumentException("Unable to encode ranges to Base64", ex); } }
From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java
License:Apache License
/** * Restricts the columns that will be mapped over for the single input table on this job. * * @param implementingClass//from ww w .j ava2s . c o m * the class whose name will be used as a prefix for the property configuration key * @param conf * the Hadoop configuration object to configure * @param columnFamilyColumnQualifierPairs * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is * selected. An empty set is the default and is equivalent to scanning the all columns. * @throws IllegalArgumentException * if the column family is null * @since 1.6.0 */ public static void fetchColumns(Class<?> implementingClass, Configuration conf, Collection<Pair<Text, Text>> columnFamilyColumnQualifierPairs) { checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null"); String[] columnStrings = serializeColumns(columnFamilyColumnQualifierPairs); conf.setStrings(enumToConfKey(implementingClass, ScanOpts.COLUMNS), columnStrings); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.lib.InputConfigurator.java
License:Apache License
/** * Restricts the columns that will be mapped over for the single input table on this job. *//*w w w . j a v a 2 s . c om*/ public static void fetchColumns(Class<?> implementingClass, Configuration conf, Collection<IteratorSetting.Column> columnFamilyColumnQualifierPairs) { checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null"); String[] columnStrings = serializeColumns(columnFamilyColumnQualifierPairs); conf.setStrings(enumToConfKey(implementingClass, ScanOpts.COLUMNS), columnStrings); }
From source file:org.apache.ambari.TestJobHistoryParsing.java
License:Apache License
private static void setProperties(Configuration conf, String workflowId, String workflowName, String workflowNodeName, Map<String, String[]> adj) { conf.set(ID_PROP, workflowId);/*from w w w . j ava 2 s . c om*/ conf.set(NAME_PROP, workflowName); conf.set(NODE_PROP, workflowNodeName); for (Entry<String, String[]> entry : adj.entrySet()) { conf.setStrings(ADJ_PROP + "." + entry.getKey(), entry.getValue()); } }
From source file:org.apache.avro.hadoop.io.AvroSerialization.java
License:Apache License
/** * Adds the AvroSerialization scheme to the configuration, so SerializationFactory * instances constructed from the given configuration will be aware of it. * * @param conf The configuration to add AvroSerialization to. *///from www .j ava 2 s.co m public static void addToConfiguration(Configuration conf) { Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); conf.setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); } }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
protected static void append(Configuration configuration, String name, String value) { Collection<String> set = configuration.getStringCollection(name); if (set == null) { set = new TreeSet<String>(); }//from w w w .j a v a 2 s . com set.add(value); configuration.setStrings(name, set.toArray(new String[set.size()])); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
/** * Adds the column layout for the given family. * //from w ww.ja va 2 s . com * @param configuration * the configuration to apply the layout. * @param family * the family name. * @param columns * the column names. */ public static void addColumns(Configuration configuration, String family, String... columns) { Collection<String> families = new TreeSet<String>(configuration.getStringCollection(BLUR_CSV_FAMILIES)); families.add(family); configuration.setStrings(BLUR_CSV_FAMILIES, families.toArray(new String[] {})); configuration.setStrings(BLUR_CSV_FAMILY_COLUMN_PREFIX + family, columns); }