List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:org.schedoscope.export.BaseExportJob.java
License:Apache License
protected Configuration configureAnonFields(Configuration conf) { conf.setStrings(EXPORT_ANON_FIELDS, anonFields); conf.set(EXPORT_ANON_SALT, exportSalt); return conf;/*from w ww .j a v a2 s . c o m*/ }
From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat.java
License:Apache License
/** * A method to configure the output format. * * @param job The job object.// ww w. j a v a 2 s .c o m * @param tableName The Hive input table name * @param printHeader A flag indicating to print a csv header or not. * @param delimiter The delimiter to use for separating the records (CSV) * @param fileType The file type (csv / json) * @param codec The compresson codec (none / gzip / bzip2) * @param ftpEndpoint The (s)ftp endpoint. * @param ftpUser The (s)ftp user * @param ftpPass The (s)ftp password or sftp passphrase * @param keyFile The private ssh key file * @param filePrefix An optional file prefix * @param passiveMode Passive mode or not (only ftp) * @param userIsRoot User dir is root or not * @param cleanHdfsDir Clean up HDFS temporary files. * @throws Exception Is thrown if an error occurs. */ public static void setOutput(Job job, String tableName, boolean printHeader, String delimiter, FileOutputType fileType, FileCompressionCodec codec, String ftpEndpoint, String ftpUser, String ftpPass, String keyFile, String filePrefix, boolean passiveMode, boolean userIsRoot, boolean cleanHdfsDir) throws Exception { Configuration conf = job.getConfiguration(); String tmpDir = conf.get("hadoop.tmp.dir"); String localTmpDir = RandomStringUtils.randomNumeric(10); setOutputPath(job, new Path(tmpDir, FTP_EXPORT_TMP_OUTPUT_PATH + localTmpDir)); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 2); conf.set(FTP_EXPORT_TABLE_NAME, tableName); conf.set(FTP_EXPORT_ENDPOINT, ftpEndpoint); conf.set(FTP_EXPORT_USER, ftpUser); if (ftpPass != null) { conf.set(FTP_EXPORT_PASS, ftpPass); } if (delimiter != null) { if (delimiter.length() != 1) { throw new IllegalArgumentException("delimiter must be exactly 1 char"); } conf.set(FTP_EXPORT_CVS_DELIMITER, delimiter); } if (keyFile != null && Files.exists(Paths.get(keyFile))) { // Uploader.checkPrivateKey(keyFile); String privateKey = new String(Files.readAllBytes(Paths.get(keyFile)), StandardCharsets.US_ASCII); conf.set(FTP_EXPORT_KEY_FILE_CONTENT, privateKey); } conf.setBoolean(FTP_EXPORT_PASSIVE_MODE, passiveMode); conf.setBoolean(FTP_EXPORT_USER_IS_ROOT, userIsRoot); conf.setBoolean(FTP_EXPORT_CLEAN_HDFS_DIR, cleanHdfsDir); DateTimeFormatter fmt = ISODateTimeFormat.basicDateTimeNoMillis(); String timestamp = fmt.print(DateTime.now(DateTimeZone.UTC)); conf.set(FTP_EXPORT_FILE_PREFIX, filePrefix + "-" + timestamp + "-"); if (printHeader) { conf.setStrings(FTP_EXPORT_HEADER_COLUMNS, setCSVHeader(conf)); } conf.set(FTP_EXPORT_FILE_TYPE, fileType.toString()); if (codec.equals(FileCompressionCodec.gzip)) { setOutputCompressorClass(job, GzipCodec.class); } else if (codec.equals(FileCompressionCodec.bzip2)) { setOutputCompressorClass(job, BZip2Codec.class); } else if (codec.equals(FileCompressionCodec.none)) { extension = ""; } }
From source file:org.schedoscope.export.HiveUnitBaseTest.java
License:Apache License
public void setUpHiveServer(String dataFile, String hiveScript, String tableName) throws Exception { // load data into hive table File inputRawData = new File(dataFile); String inputRawDataAbsFilePath = inputRawData.getAbsolutePath(); Map<String, String> params = new HashMap<String, String>(); params.put(DATA_FILE_PATH, inputRawDataAbsFilePath); List<String> results = testSuite.executeScript(hiveScript, params); assertNotEquals(0, results.size());/*ww w . j av a 2s . com*/ // set up database related settings Configuration conf = testSuite.getFS().getConf(); conf.set(Schema.JDBC_CONNECTION_STRING, DEFAULT_DERBY_DB); Schema schema = SchemaFactory.getSchema(conf); // set up column type mapping HCatInputFormat.setInput(conf, DEFAUlT_HIVE_DB, tableName); hcatInputSchema = HCatInputFormat.getTableSchema(conf); conf.setStrings(Schema.JDBC_OUTPUT_COLUMN_TYPES, SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, schema, new HashSet<String>(0))); // set up hcatalog record reader ReadEntity.Builder builder = new ReadEntity.Builder(); ReadEntity entity = builder.withDatabase(DEFAUlT_HIVE_DB).withTable(tableName).build(); Map<String, String> config = new HashMap<String, String>(); HCatReader masterReader = DataTransferFactory.getHCatReader(entity, config); ReaderContext ctx = masterReader.prepareRead(); hcatRecordReader = DataTransferFactory.getHCatReader(ctx, 0); }
From source file:org.seqdoop.hadoop_bam.cli.plugins.FixMate.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("fixmate :: WORKDIR not given."); return 3; }//from ww w . j a va2 s. c om if (args.size() == 1) { System.err.println("fixmate :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final ValidationStringency stringency = Utils.toStringency( parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "fixmate"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); final boolean globalSort = parser.getBoolean(sortOpt); if (globalSort) Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); if (globalSort) Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(FixMate.class); job.setMapperClass(FixMateMapper.class); job.setReducerClass(FixMateReducer.class); if (!parser.getBoolean(noCombinerOpt)) job.setCombinerClass(FixMateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); if (globalSort) { job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("fixmate :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); } job.submit(); System.out.println("fixmate :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("fixmate :: Job failed."); return 4; } System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("fixmate :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate"); } catch (IOException e) { System.err.printf("fixmate :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:org.seqdoop.hadoop_bam.cli.plugins.Sort.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("sort :: WORKDIR not given."); return 3; }//w ww . ja va2s. c o m if (args.size() == 1) { System.err.println("sort :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final ValidationStringency stringency = Utils.toStringency( parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("sort :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); job.submit(); System.out.println("sort :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("sort :: Job failed."); return 4; } System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("sort :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort"); } catch (IOException e) { System.err.printf("sort :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:org.springframework.data.hadoop.serialization.AbstractSequenceFileFormat.java
License:Apache License
/** * Adds the {@link Serialization} scheme to the configuration, so {@link SerializationFactory} instances are aware * of it.// w w w . j a va 2 s.co m * * @param serializationClass The Serialization classes to register to underlying configuration. */ @SuppressWarnings("rawtypes") protected void registerSeqFileSerialization(Class<? extends Serialization>... serializationClasses) { Configuration conf = getConfiguration(); Collection<String> serializations = conf.getStringCollection(HADOOP_IO_SERIALIZATIONS); for (Class<?> serializationClass : serializationClasses) { if (!serializations.contains(serializationClass.getName())) { serializations.add(serializationClass.getName()); } } conf.setStrings(HADOOP_IO_SERIALIZATIONS, serializations.toArray(new String[serializations.size()])); }
From source file:org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (null == args || args.length == 0) { System.err.println("no any option given !!"); printUsage();//from w ww. j ava 2s . c om return -1; } System.out.println("options:" + Arrays.toString(args)); boolean and = true; String cmd = null; int mustStartIdx = -1; for (int a = 0; a < args.length; a++) { cmd = args[a]; if (cmd.startsWith("-")) { if (mustStartIdx > -1) { System.err.println("option order is incorrect !!"); printUsage(); return -1; } if ("-a".equals(cmd)) { and = true; } else if ("-o".equals(cmd)) { and = false; } else { System.err.println("option is not defined !!"); printUsage(); return -1; } } else { if (mustStartIdx == -1) { mustStartIdx = a; } } } String tableName = args[mustStartIdx]; String outputPath = args[mustStartIdx + 1]; List<String> columns = new ArrayList<String>(); for (int a = mustStartIdx + 2; a < args.length; a++) { columns.add(args[a]); } LOGGER.info("tableName=" + tableName); LOGGER.info("outputPath=" + outputPath); LOGGER.info("columns=" + columns); Configuration conf = this.getConf(); conf.setBoolean(Mapper.AND_OR, and); conf.setStrings(Mapper.NO_COLUMNS, columns.toArray(new String[] {})); Job job = createSubmittableJob(conf, tableName, outputPath); boolean success = job.waitForCompletion(true); if (!success) { System.err.println("run job:" + job.getJobName() + " failed"); return -1; } // for test Counter counter = job.getCounters().findCounter( "org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows$Mapper$Counters", "COLLECTED_ROWS"); if (null != counter) { collectedRow = counter.getValue(); } return 0; }
From source file:org.voltdb.hadoop.VoltConfiguration.java
License:Open Source License
/** * Sets the job configuration properties that correspond to the given parameters * * @param conf a {@linkplain Configuration} * @param hostNames an array of host names * @param userName The user name for client connection * @param password The password for client connection * @param tableName destination table name *///from w w w . j a v a2 s . co m public static void configureVoltDB(Configuration conf, String[] hostNames, String userName, String password, String tableName) { conf.setBoolean(MAP_SPECULATIVE_EXEC, false); conf.setBoolean(REDUCE_SPECULATIVE_EXEC, false); conf.setStrings(HOSTNAMES_PROP, hostNames); if (!isNullOrEmpty.apply(userName)) { conf.set(USERNAME_PROP, userName); } if (!isNullOrEmpty.apply(password)) { conf.set(PASSWORD_PROP, password); } conf.set(TABLENAME_PROP, tableName); }
From source file:org.voltdb.hadoop.VoltConfiguration.java
License:Open Source License
public static void loadVoltClientJar(Configuration conf) { String voltJar = ClientImpl.class.getProtectionDomain().getCodeSource().getLocation().toString(); if (voltJar.toLowerCase().endsWith(".jar")) { String[] jars = conf.getStrings(TMPJARS_PROP, new String[0]); jars = Arrays.copyOf(jars, jars.length + 1); jars[jars.length - 1] = voltJar; conf.setStrings(TMPJARS_PROP, jars); }/* ww w . j av a 2s. c o m*/ }
From source file:partialJoin.JoinPlaner.java
License:Open Source License
private static void printNonJoinV(Configuration joinConf, String ret, String[] lines) { //try {//from www. j ava 2 s .c om int s = 0; for (int i = 0; i < join_files.length; i++) { if (lines[i].contains("|")) { if (lines[i].contains("J")) System.exit(1); String fname = lines[i].substring(0, lines[i].indexOf("|")); joinConf.set("input.reduceScans." + s + ".fname", fname); //Bytes.writeByteArray(v, Bytes.toBytes(fname)); int id = Integer.parseInt(lines[i].substring(lines[i].indexOf("P") + 1));//String.valueOf(lines[i].charAt(lines[i].length()-1))); Scan scan = getScan(id); joinConf.set("input.reduceScans." + s + ".startrow", Bytes.toStringBinary(scan.getStartRow())); //Bytes.writeByteArray(v, scan.getStartRow()); if (scan.hasFamilies()) { System.out.println(Bytes.toString(scan.getFamilies()[0])); joinConf.set("input.reduceScans." + s + ".columns", Bytes.toString(scan.getFamilies()[0])); //Bytes.writeByteArray(v, scan.getFamilies()[0]);//Bytes.toBytes(getScan(id).getInputColumns())); } else { System.out.println("no"); joinConf.set("input.reduceScans." + s + ".columns", ""); //Bytes.writeByteArray(v, Bytes.toBytes(""));//Bytes.toBytes(getScan(id).getInputColumns())); } s++; } } joinConf.setStrings("input.reduceScans", s + ""); //Bytes.writeByteArray(joinConf, Bytes.toBytes("end")); //} catch (IOException e) { // e.printStackTrace(); //} }