Example usage for org.apache.hadoop.conf Configuration setStrings

List of usage examples for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values) 

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:org.schedoscope.export.BaseExportJob.java

License:Apache License

protected Configuration configureAnonFields(Configuration conf) {
    conf.setStrings(EXPORT_ANON_FIELDS, anonFields);
    conf.set(EXPORT_ANON_SALT, exportSalt);
    return conf;/*from  w  ww .j  a v  a2  s . c  o m*/
}

From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat.java

License:Apache License

/**
 * A method to configure the output format.
 *
 * @param job          The job object.// ww  w.  j  a  v a 2  s .c  o m
 * @param tableName    The Hive input table name
 * @param printHeader  A flag indicating to print a csv header or not.
 * @param delimiter    The delimiter to use for separating the records (CSV)
 * @param fileType     The file type (csv / json)
 * @param codec        The compresson codec (none / gzip / bzip2)
 * @param ftpEndpoint  The (s)ftp endpoint.
 * @param ftpUser      The (s)ftp user
 * @param ftpPass      The (s)ftp password or sftp passphrase
 * @param keyFile      The private ssh key file
 * @param filePrefix   An optional file prefix
 * @param passiveMode  Passive mode or not (only ftp)
 * @param userIsRoot   User dir is root or not
 * @param cleanHdfsDir Clean up HDFS temporary files.
 * @throws Exception Is thrown if an error occurs.
 */
public static void setOutput(Job job, String tableName, boolean printHeader, String delimiter,
        FileOutputType fileType, FileCompressionCodec codec, String ftpEndpoint, String ftpUser, String ftpPass,
        String keyFile, String filePrefix, boolean passiveMode, boolean userIsRoot, boolean cleanHdfsDir)
        throws Exception {

    Configuration conf = job.getConfiguration();
    String tmpDir = conf.get("hadoop.tmp.dir");
    String localTmpDir = RandomStringUtils.randomNumeric(10);
    setOutputPath(job, new Path(tmpDir, FTP_EXPORT_TMP_OUTPUT_PATH + localTmpDir));

    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 2);

    conf.set(FTP_EXPORT_TABLE_NAME, tableName);

    conf.set(FTP_EXPORT_ENDPOINT, ftpEndpoint);
    conf.set(FTP_EXPORT_USER, ftpUser);

    if (ftpPass != null) {
        conf.set(FTP_EXPORT_PASS, ftpPass);
    }

    if (delimiter != null) {
        if (delimiter.length() != 1) {
            throw new IllegalArgumentException("delimiter must be exactly 1 char");
        }
        conf.set(FTP_EXPORT_CVS_DELIMITER, delimiter);
    }

    if (keyFile != null && Files.exists(Paths.get(keyFile))) {

        // Uploader.checkPrivateKey(keyFile);
        String privateKey = new String(Files.readAllBytes(Paths.get(keyFile)), StandardCharsets.US_ASCII);
        conf.set(FTP_EXPORT_KEY_FILE_CONTENT, privateKey);
    }

    conf.setBoolean(FTP_EXPORT_PASSIVE_MODE, passiveMode);
    conf.setBoolean(FTP_EXPORT_USER_IS_ROOT, userIsRoot);
    conf.setBoolean(FTP_EXPORT_CLEAN_HDFS_DIR, cleanHdfsDir);

    DateTimeFormatter fmt = ISODateTimeFormat.basicDateTimeNoMillis();
    String timestamp = fmt.print(DateTime.now(DateTimeZone.UTC));
    conf.set(FTP_EXPORT_FILE_PREFIX, filePrefix + "-" + timestamp + "-");

    if (printHeader) {
        conf.setStrings(FTP_EXPORT_HEADER_COLUMNS, setCSVHeader(conf));
    }

    conf.set(FTP_EXPORT_FILE_TYPE, fileType.toString());

    if (codec.equals(FileCompressionCodec.gzip)) {
        setOutputCompressorClass(job, GzipCodec.class);
    } else if (codec.equals(FileCompressionCodec.bzip2)) {
        setOutputCompressorClass(job, BZip2Codec.class);
    } else if (codec.equals(FileCompressionCodec.none)) {
        extension = "";
    }
}

From source file:org.schedoscope.export.HiveUnitBaseTest.java

License:Apache License

public void setUpHiveServer(String dataFile, String hiveScript, String tableName) throws Exception {

    // load data into hive table
    File inputRawData = new File(dataFile);
    String inputRawDataAbsFilePath = inputRawData.getAbsolutePath();
    Map<String, String> params = new HashMap<String, String>();
    params.put(DATA_FILE_PATH, inputRawDataAbsFilePath);
    List<String> results = testSuite.executeScript(hiveScript, params);
    assertNotEquals(0, results.size());/*ww w .  j av a  2s . com*/

    // set up database related settings
    Configuration conf = testSuite.getFS().getConf();
    conf.set(Schema.JDBC_CONNECTION_STRING, DEFAULT_DERBY_DB);
    Schema schema = SchemaFactory.getSchema(conf);

    // set up column type mapping
    HCatInputFormat.setInput(conf, DEFAUlT_HIVE_DB, tableName);
    hcatInputSchema = HCatInputFormat.getTableSchema(conf);
    conf.setStrings(Schema.JDBC_OUTPUT_COLUMN_TYPES,
            SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, schema, new HashSet<String>(0)));

    // set up hcatalog record reader
    ReadEntity.Builder builder = new ReadEntity.Builder();
    ReadEntity entity = builder.withDatabase(DEFAUlT_HIVE_DB).withTable(tableName).build();

    Map<String, String> config = new HashMap<String, String>();
    HCatReader masterReader = DataTransferFactory.getHCatReader(entity, config);
    ReaderContext ctx = masterReader.prepareRead();

    hcatRecordReader = DataTransferFactory.getHCatReader(ctx, 0);
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.FixMate.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("fixmate :: WORKDIR not given.");
        return 3;
    }//from   ww  w  .  j  a  va2  s.  c  om
    if (args.size() == 1) {
        System.err.println("fixmate :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "fixmate");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    final boolean globalSort = parser.getBoolean(sortOpt);
    if (globalSort)
        Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname);

    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        if (globalSort)
            Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(FixMate.class);
        job.setMapperClass(FixMateMapper.class);
        job.setReducerClass(FixMateReducer.class);

        if (!parser.getBoolean(noCombinerOpt))
            job.setCombinerClass(FixMateReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(AnySAMInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        if (globalSort) {
            job.setPartitionerClass(TotalOrderPartitioner.class);

            System.out.println("fixmate :: Sampling...");
            t.start();

            InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                    new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                            Math.max(100, reduceTasks)));

            System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());
        }

        job.submit();

        System.out.println("fixmate :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("fixmate :: Job failed.");
            return 4;
        }

        System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("fixmate :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate");
        } catch (IOException e) {
            System.err.printf("fixmate :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.Sort.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("sort :: WORKDIR not given.");
        return 3;
    }//w ww  .  ja  va2s. c o  m
    if (args.size() == 1) {
        System.err.println("sort :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate);
    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(Sort.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(SortReducer.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(SortInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        job.setPartitionerClass(TotalOrderPartitioner.class);

        System.out.println("sort :: Sampling...");
        t.start();

        InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                        Math.max(100, reduceTasks)));

        System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

        job.submit();

        System.out.println("sort :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("sort :: Job failed.");
            return 4;
        }

        System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("sort :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort");
        } catch (IOException e) {
            System.err.printf("sort :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:org.springframework.data.hadoop.serialization.AbstractSequenceFileFormat.java

License:Apache License

/**
 * Adds the {@link Serialization} scheme to the configuration, so {@link SerializationFactory} instances are aware
 * of it.// w  w  w . j a va 2 s.co m
 * 
 * @param serializationClass The Serialization classes to register to underlying configuration.
 */
@SuppressWarnings("rawtypes")
protected void registerSeqFileSerialization(Class<? extends Serialization>... serializationClasses) {

    Configuration conf = getConfiguration();

    Collection<String> serializations = conf.getStringCollection(HADOOP_IO_SERIALIZATIONS);

    for (Class<?> serializationClass : serializationClasses) {

        if (!serializations.contains(serializationClass.getName())) {

            serializations.add(serializationClass.getName());
        }
    }

    conf.setStrings(HADOOP_IO_SERIALIZATIONS, serializations.toArray(new String[serializations.size()]));
}

From source file:org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (null == args || args.length == 0) {
        System.err.println("no any option given !!");
        printUsage();//from  w ww. j ava  2s . c  om
        return -1;
    }

    System.out.println("options:" + Arrays.toString(args));
    boolean and = true;
    String cmd = null;
    int mustStartIdx = -1;
    for (int a = 0; a < args.length; a++) {
        cmd = args[a];
        if (cmd.startsWith("-")) {
            if (mustStartIdx > -1) {
                System.err.println("option order is incorrect !!");
                printUsage();
                return -1;
            }

            if ("-a".equals(cmd)) {
                and = true;
            } else if ("-o".equals(cmd)) {
                and = false;
            } else {
                System.err.println("option is not defined !!");
                printUsage();
                return -1;
            }
        } else {
            if (mustStartIdx == -1) {
                mustStartIdx = a;
            }
        }
    }

    String tableName = args[mustStartIdx];
    String outputPath = args[mustStartIdx + 1];
    List<String> columns = new ArrayList<String>();
    for (int a = mustStartIdx + 2; a < args.length; a++) {
        columns.add(args[a]);
    }

    LOGGER.info("tableName=" + tableName);
    LOGGER.info("outputPath=" + outputPath);
    LOGGER.info("columns=" + columns);

    Configuration conf = this.getConf();
    conf.setBoolean(Mapper.AND_OR, and);
    conf.setStrings(Mapper.NO_COLUMNS, columns.toArray(new String[] {}));

    Job job = createSubmittableJob(conf, tableName, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        System.err.println("run job:" + job.getJobName() + " failed");
        return -1;
    }

    // for test
    Counter counter = job.getCounters().findCounter(
            "org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows$Mapper$Counters", "COLLECTED_ROWS");
    if (null != counter) {
        collectedRow = counter.getValue();
    }

    return 0;
}

From source file:org.voltdb.hadoop.VoltConfiguration.java

License:Open Source License

/**
 * Sets the job configuration properties that correspond to the given parameters
 *
 * @param conf a {@linkplain Configuration}
 * @param hostNames an array of host names
 * @param userName  The user name for client connection
 * @param password   The password for client connection
 * @param tableName destination table name
 *///from  w  w w .  j  a  v  a2 s . co m
public static void configureVoltDB(Configuration conf, String[] hostNames, String userName, String password,
        String tableName) {
    conf.setBoolean(MAP_SPECULATIVE_EXEC, false);
    conf.setBoolean(REDUCE_SPECULATIVE_EXEC, false);

    conf.setStrings(HOSTNAMES_PROP, hostNames);
    if (!isNullOrEmpty.apply(userName)) {
        conf.set(USERNAME_PROP, userName);
    }
    if (!isNullOrEmpty.apply(password)) {
        conf.set(PASSWORD_PROP, password);
    }
    conf.set(TABLENAME_PROP, tableName);
}

From source file:org.voltdb.hadoop.VoltConfiguration.java

License:Open Source License

public static void loadVoltClientJar(Configuration conf) {
    String voltJar = ClientImpl.class.getProtectionDomain().getCodeSource().getLocation().toString();

    if (voltJar.toLowerCase().endsWith(".jar")) {
        String[] jars = conf.getStrings(TMPJARS_PROP, new String[0]);
        jars = Arrays.copyOf(jars, jars.length + 1);
        jars[jars.length - 1] = voltJar;
        conf.setStrings(TMPJARS_PROP, jars);
    }/*  ww w  .  j av a 2s.  c  o m*/
}

From source file:partialJoin.JoinPlaner.java

License:Open Source License

private static void printNonJoinV(Configuration joinConf, String ret, String[] lines) {
    //try {//from  www. j  ava 2  s .c om
    int s = 0;
    for (int i = 0; i < join_files.length; i++) {
        if (lines[i].contains("|")) {
            if (lines[i].contains("J"))
                System.exit(1);
            String fname = lines[i].substring(0, lines[i].indexOf("|"));
            joinConf.set("input.reduceScans." + s + ".fname", fname);
            //Bytes.writeByteArray(v, Bytes.toBytes(fname));
            int id = Integer.parseInt(lines[i].substring(lines[i].indexOf("P") + 1));//String.valueOf(lines[i].charAt(lines[i].length()-1)));
            Scan scan = getScan(id);
            joinConf.set("input.reduceScans." + s + ".startrow", Bytes.toStringBinary(scan.getStartRow()));
            //Bytes.writeByteArray(v, scan.getStartRow());
            if (scan.hasFamilies()) {
                System.out.println(Bytes.toString(scan.getFamilies()[0]));
                joinConf.set("input.reduceScans." + s + ".columns", Bytes.toString(scan.getFamilies()[0]));
                //Bytes.writeByteArray(v, scan.getFamilies()[0]);//Bytes.toBytes(getScan(id).getInputColumns()));
            } else {
                System.out.println("no");
                joinConf.set("input.reduceScans." + s + ".columns", "");
                //Bytes.writeByteArray(v, Bytes.toBytes(""));//Bytes.toBytes(getScan(id).getInputColumns()));
            }
            s++;
        }
    }
    joinConf.setStrings("input.reduceScans", s + "");
    //Bytes.writeByteArray(joinConf, Bytes.toBytes("end"));
    //} catch (IOException e) {
    //   e.printStackTrace();
    //}
}