List of usage examples for org.apache.hadoop.conf Configuration getStrings
public String[] getStrings(String name)
name
property as an array of String
s. From source file:ImportTsv.java
License:Apache License
/** * Sets up the actual job.//w ww . j av a2 s. c o m * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; TableName tableName = TableName.valueOf(args[0]); Path inputDir = new Path(args[1]); // set filter conf.set(EASTCOM_FILTER_PARAMS, args[3]); conf.set(EASTCOM_FILTER_DEFINE, args[4]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) { String fileLoc = conf.get(CREDENTIALS_LOCATION); Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf); job.getCredentials().addAll(cred); } if (hfileOutPath != null) { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) { LOG.warn(errorMsg); // TODO: this is backwards. Instead of depending on the existence of a table, // create a sane splits file for HFileOutputFormat based on data sampling. createTable(admin, tableName, columns); } else { LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } } try (HTable table = (HTable) connection.getTable(tableName)) { boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false); // if no.strict is false then check column family if (!noStrict) { ArrayList<String> unmatchedFamilies = new ArrayList<String>(); Set<String> cfSet = getColumnFamilies(columns); HTableDescriptor tDesc = table.getTableDescriptor(); for (String cf : cfSet) { if (tDesc.getFamily(Bytes.toBytes(cf)) == null) { unmatchedFamilies.add(cf); } } if (unmatchedFamilies.size() > 0) { ArrayList<String> familyNames = new ArrayList<String>(); for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) { familyNames.add(family.getNameAsString()); } String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n"; usage(msg); System.exit(-1); } } job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); if (mapperClass.equals(TsvImporterTextMapper.class)) { job.setMapOutputValueClass(Text.class); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); } HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.class.toString() + " or custom mapper whose value type is Put."); System.exit(-1); } // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); } } return job; }
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
public NameNodeBackupBlockCheckProcessor(Configuration conf, NameNodeRestoreProcessor processor, NameNode namenode, UserGroupInformation ugi) throws Exception { String[] nnStorageLocations = conf.getStrings(DFS_NAMENODE_NAME_DIR); URI uri = new URI(nnStorageLocations[0]); _reportPath = new File(new File(uri.getPath()).getParent(), "backup-reports"); _reportPath.mkdirs();/*from w ww . j a va 2 s. c o m*/ if (!_reportPath.exists()) { throw new IOException("Report path " + _reportPath + " does not exist"); } this.ugi = ugi; this.namenode = namenode; this.conf = conf; this.processor = processor; backupStore = BackupStore.create(BackupUtil.convert(conf)); this.fileSystem = (DistributedFileSystem) FileSystem.get(conf); this.ignorePath = conf.get(DFS_BACKUP_IGNORE_PATH_FILE_KEY, DFS_BACKUP_IGNORE_PATH_FILE_DEFAULT); this.batchSize = conf.getInt(DFS_BACKUP_REMOTE_BACKUP_BATCH_KEY, DFS_BACKUP_REMOTE_BACKUP_BATCH_DEFAULT); this.checkInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_KEY, DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DEFAULT); this.initInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_KEY, DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_DEFAULT); start(); }
From source file:bulkload.ImportTsv.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { usage("Wrong number of arguments: " + args.length); return -1; }// w w w. j a va 2 s . c o m setConf(HBaseConfiguration.create(getConf())); Configuration conf = getConf(); // Make sure columns are specified String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (columns == null) { usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=..."); return -1; } // Make sure rowkey is specified String rowkey = conf.get(ROWKEY_CONF_KEY); if (StringUtil.isEmpty(rowkey)) { usage("No rowkey specified or rowkey is empty. Please specify with -D" + ROWKEY_CONF_KEY + "=..."); return -1; } // Make sure rowkey handler is specified String rowKeyGenerator = conf.get(ROWKEY_GENERATOR_CONF_KEY); if (StringUtil.isEmpty(rowKeyGenerator)) { usage("No rowkey_handler specified or rowkey generator is empty. Please specify with -D" + ROWKEY_GENERATOR_CONF_KEY + "=..."); return -1; } // Make sure they specify exactly one column as the row key int rowkeysFound = 0; for (String col : columns) { String[] parts = col.split(":", 3); if (parts.length > 1 && rowkey.equals(parts[1])) { rowkeysFound++; } } if (rowkeysFound != 1) { usage("Must specify exactly one column as " + rowkey); return -1; } // Make sure at least one columns are specified if (columns.length < 1) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } Job job = createSubmittableJob(conf, args); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
private static void createTable(Configuration conf, String tableName) throws IOException { HTableDescriptor htd = new HTableDescriptor(tableName.getBytes()); String columns[] = conf.getStrings(CommonConstants.COLUMNS); Set<String> cfSet = new HashSet<String>(); for (String aColumn : columns) { // if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)) continue; // we are only concerned with the first one (in case this is a cf:cq) cfSet.add(aColumn.split(":", 2)[0]); }//from w ww . j a v a2 s.co m for (String cf : cfSet) { HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf)); htd.addFamily(hcd); } hbaseAdmin.createTable(htd); }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
private static void createTable(Configuration conf, String tableName) throws IOException { HTableDescriptor htd = new HTableDescriptor(tableName.getBytes()); String columns[] = conf.getStrings(CommonConstants.COLUMNS); Set<String> cfSet = new HashSet<String>(); //columns?// w w w . j av a 2 s . co m for (String aColumn : columns) { //if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)) continue; // we are only concerned with the first one (in case this is a cf:cq) cfSet.add(aColumn.split(":", 2)[0]); } for (String cf : cfSet) { HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf)); htd.addFamily(hcd); } hbaseAdmin.createTable(htd); }
From source file:com.asakusafw.runtime.stage.resource.StageResourceDriver.java
License:Apache License
private static ArrayList<String> restoreStrings(Configuration conf, String key) { assert conf != null; assert key != null; ArrayList<String> results = new ArrayList<>(); String[] old = conf.getStrings(key); if (old != null && old.length >= 1) { Collections.addAll(results, old); }// w w w. j a v a2 s . c om return results; }
From source file:com.asp.tranlog.ImportTsv.java
License:Apache License
/** * Main entry point.// w w w . j a v a 2 s .c o m * * @param args * The command line parameters. * @throws Exception * When running the job fails. */ public static void main(String[] args) throws Exception { System.out.println("=============================================="); Configuration conf = HBaseConfiguration.create(); LOG.error(PRE + "conf.toString() == " + conf.toString()); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (columns == null) { usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=..."); System.exit(-1); } // Make sure one or more columns are specified if (columns.length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } columns = conf.getStrings(COLUMNS_CONF_KEY); if (columns == null) { usage("One or more key columns are required"); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.asp.tranlog.TsvImporterMapper.java
License:Apache License
/** * Handles initializing this class with objects specific to it (i.e., the * parser). Common initialization that might be leveraged by a subsclass is * done in <code>doSetup</code>. Hence a subclass may choose to override * this method and call <code>doSetup</code> as well before handling it's * own custom params./*from w w w . j a v a2s .co m*/ * * @param context */ @Override protected void setup(Context context) { doSetup(context); Configuration conf = context.getConfiguration(); charset = conf.get(ImportTsv.CHARSET_CONF_KEY); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), conf.getStrings(ImportTsv.KEYCOLUMNS_CONF_KEY), separator); keyColIndex = parser.getRowKeyColumnIndex(); keyColLen = parser.getRowKeyColumnLen(); if (keyColIndex == null) { throw new RuntimeException("No row key column specified"); } columnTypes = parser.getColType(); if (columnTypes != null) { colDatetimeFormater = new char[columnTypes.length]; for (int i = 0; i < columnTypes.length; i++) colDatetimeFormater[i] = 0; } }
From source file:com.atlantbh.nutch.filter.xpath.DOMContentUtils.java
License:Apache License
public void setConf(Configuration conf) { // forceTags is used to override configurable tag ignoring, later on Collection<String> forceTags = new ArrayList<String>(1); this.conf = conf; linkParams.clear();//from ww w. j a v a 2 s. com linkParams.put("a", new LinkParams("a", "href", 1)); linkParams.put("area", new LinkParams("area", "href", 0)); if (conf.getBoolean("parser.html.form.use_action", true)) { linkParams.put("form", new LinkParams("form", "action", 1)); if (conf.get("parser.html.form.use_action") != null) forceTags.add("form"); } linkParams.put("frame", new LinkParams("frame", "src", 0)); linkParams.put("iframe", new LinkParams("iframe", "src", 0)); linkParams.put("script", new LinkParams("script", "src", 0)); linkParams.put("link", new LinkParams("link", "href", 0)); linkParams.put("img", new LinkParams("img", "src", 0)); // remove unwanted link tags from the linkParams map String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags"); for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) { if (!forceTags.contains(ignoreTags[i])) linkParams.remove(ignoreTags[i]); } }
From source file:com.bah.culvert.hive.CulvertHiveUtils.java
License:Apache License
/** * Get the hive column names in the conf. Corresponds 1-1 to the mappings. * /*from ww w .j av a 2 s. com*/ * @param conf The configuration to get the the names out of. * @return The hive column names. */ public static String[] getHiveColumnNamesFromConf(Configuration conf) { return conf.getStrings(CULVERT_HIVE_COLUMN_NAMES_CONF_KEY); }