Example usage for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values)

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:org.bgi.flexlab.gaea.tools.mapreduce.markduplicate.MarkDuplicateOptions.java

License:Open Source License

@Override
public void setHadoopConf(String[] args, Configuration conf) {
    conf.setStrings("args", args);
    Path p = new Path(this.getInput());
    try {/*www  .  j a v  a  2  s . c o  m*/
        fs = p.getFileSystem(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    traversalInputPath(this.getInput());
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSortOptions.java

License:Open Source License

@Override
public void setHadoopConf(String[] args, Configuration conf) {
    try {/*from   w w  w .j  av  a  2s.  c  o m*/
        String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();
        conf.setStrings("args", otherArgs);
        conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF");
        conf.set(GaeaVCFHeader.VCF_HEADER_PROPERTY, setOutputURI("vcfHeader.obj"));
        conf.set(VCFRecordReader.CHR_ORDER_PROPERTY, setOutputURI("chrOrder.obj"));
        conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, false);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcfqualitycontrol.VCFQualityControlOptions.java

License:Open Source License

@Override
public void setHadoopConf(String[] args, Configuration conf) {
    try {/*ww  w  .  j a  v  a 2 s. com*/
        String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();
        conf.setStrings("args", otherArgs);
        conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF");
        conf.set(GaeaVCFHeader.VCF_HEADER_PROPERTY, setOutputURI("vcfHeader.obj"));
        conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, false);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.calrissian.accumulorecipes.commons.hadoop.BaseQfdInputFormat.java

License:Apache License

/**
 * Sets selection fields on the current configuration.
 *///from w w w. j a  v  a 2  s.c o  m
public static void setSelectFields(Configuration config, Set<String> selectFields) {

    if (selectFields != null)
        config.setStrings(SELECT_FIELDS, selectFields.toArray(new String[] {}));
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Use this before submitting a graph map job. It will appropriately set up
 * the job./*from  ww  w  .  j a v a 2  s  . c  o m*/
 * 
 * @param query
 *          The query defining the {@link GraphMapper} input graphs.
 * @param mapper
 *          The mapper class to use.
 * @param outputKeyClass
 *          The class of the output key.
 * @param outputValueClass
 *          The class of the output value.
 * @param job
 *          The current job to adjust. Make sure the passed job is carrying
 *          all necessary HBase configuration.
 * @param addDependencyJars
 *          upload HBase jars and jars for any of the configured job classes
 *          via the distributed cache (tmpjars).
 * @throws IOException
 *           When setting up the details fails.
 */
public static void setupGraphMapperJob(Query query, Class<? extends GraphMapper> mapper,
        Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job,
        boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException {
    job.setInputFormatClass(inputFormatClass);
    if (outputValueClass != null)
        job.setMapOutputValueClass(outputValueClass);
    if (outputKeyClass != null)
        job.setMapOutputKeyClass(outputKeyClass);
    job.setMapperClass(mapper);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));

    PlasmaType type = getRootType(query);

    Where where = query.getModel().findWhereClause();
    SelectionCollector selectionCollector = null;
    if (where != null)
        selectionCollector = new SelectionCollector(query.getModel().getSelectClause(), where, type);
    else
        selectionCollector = new SelectionCollector(query.getModel().getSelectClause(), type);
    selectionCollector.setOnlyDeclaredProperties(false);
    // FIXME: generalize collectRowKeyProperties
    for (Type t : selectionCollector.getTypes())
        collectRowKeyProperties(selectionCollector, (PlasmaType) t);

    // FIXME: just need the root table reader - remove
    DistributedGraphReader graphReader = new DistributedGraphReader(type, selectionCollector.getTypes(), null);

    HBaseFilterAssembler columnFilterAssembler = new GraphFetchColumnFilterAssembler(selectionCollector, type);
    Filter columnFilter = columnFilterAssembler.getFilter();

    From from = query.getModel().getFromClause();
    List<Scan> scans = createScans(from, where, type, columnFilter, conf);

    conf.set(GraphInputFormat.QUERY, marshal(query));
    conf.set(GraphInputFormat.ROOT_TABLE, graphReader.getRootTableReader().getTableName());

    List<String> scanStrings = new ArrayList<String>();

    for (Scan scan : scans) {
        scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME,
                Bytes.toBytes(graphReader.getRootTableReader().getTableName()));
        scanStrings.add(convertScanToString(scan));
    }
    conf.setStrings(GraphInputFormat.SCANS, scanStrings.toArray(new String[scanStrings.size()]));

    if (addDependencyJars) {
        addDependencyJars(job);
    }
    initCredentials(job);
}

From source file:org.elasticsearch.hadoop.mr.MultiOutputFormat.java

License:Apache License

public static void addOutputFormat(Configuration cfg, Class<? extends OutputFormat>... formats) {
    Collection<String> of = cfg.getStringCollection(CFG_FIELD);
    for (Class<? extends OutputFormat> format : formats) {
        of.add(format.getName());/*w  ww . j  a  v  a  2  s.  c  o m*/
    }
    cfg.setStrings(CFG_FIELD, StringUtils.join(of, ","));
}

From source file:org.gridgain.grid.ggfs.GridGgfsHadoopFileSystemLoggerStateSelfTest.java

License:Open Source License

/**
 * Instantiate new file system./*w  w w  .  j  av a  2 s .c  om*/
 *
 * @return New file system.
 * @throws Exception If failed.
 */
private GridGgfsHadoopFileSystem fileSystem() throws Exception {
    Configuration fsCfg = new Configuration();

    fsCfg.addResource(U.resolveGridGainUrl("modules/core/src/test/config/hadoop/core-site-loopback.xml"));

    fsCfg.setBoolean("fs.ggfs.impl.disable.cache", true);

    if (logging)
        fsCfg.setBoolean(String.format(PARAM_GGFS_LOG_ENABLED, "ggfs:ggfs-grid@"), logging);

    fsCfg.setStrings(String.format(PARAM_GGFS_LOG_DIR, "ggfs:ggfs-grid@"), U.getGridGainHome());

    return (GridGgfsHadoopFileSystem) FileSystem.get(new URI("ggfs://ggfs:ggfs-grid@/"), fsCfg);
}

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java

License:Open Source License

/**
 * @throws Exception If failed./*from w ww .  ja  va 2s  .com*/
 */
public void testMapRun() throws Exception {
    int lineCnt = 10000;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName());

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);

    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output/"));

    job.setJarByClass(getClass());

    GridFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(32, taskWorkDirs.size());
}

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java

License:Open Source License

/**
 * @throws Exception If failed.//from   w w  w. j a va  2 s  .  c o  m
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    GridHadoopJobId jobId = new GridHadoopJobId(UUID.randomUUID(), 2);

    GridFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java

License:Open Source License

/**
 * @throws Exception If failed./*from www . j  a  v  a2s  .  c o m*/
 */
public void testMapperException() throws Exception {
    prepareFile("/testFile", 1000);

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName());

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(FailMapper.class);

    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output/"));

    job.setJarByClass(getClass());

    final GridFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 3),
            createJobInfo(job.getConfiguration()));

    GridTestUtils.assertThrows(log, new Callable<Object>() {
        @Override
        public Object call() throws Exception {
            fut.get();

            return null;
        }
    }, GridException.class, null);
}