List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:org.bgi.flexlab.gaea.tools.mapreduce.markduplicate.MarkDuplicateOptions.java
License:Open Source License
@Override public void setHadoopConf(String[] args, Configuration conf) { conf.setStrings("args", args); Path p = new Path(this.getInput()); try {/*www . j a v a 2 s . c o m*/ fs = p.getFileSystem(conf); } catch (IOException e) { throw new RuntimeException(e); } traversalInputPath(this.getInput()); }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSortOptions.java
License:Open Source License
@Override public void setHadoopConf(String[] args, Configuration conf) { try {/*from w w w .j av a 2s. c o m*/ String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs(); conf.setStrings("args", otherArgs); conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF"); conf.set(GaeaVCFHeader.VCF_HEADER_PROPERTY, setOutputURI("vcfHeader.obj")); conf.set(VCFRecordReader.CHR_ORDER_PROPERTY, setOutputURI("chrOrder.obj")); conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, false); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcfqualitycontrol.VCFQualityControlOptions.java
License:Open Source License
@Override public void setHadoopConf(String[] args, Configuration conf) { try {/*ww w . j a v a 2 s. com*/ String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs(); conf.setStrings("args", otherArgs); conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF"); conf.set(GaeaVCFHeader.VCF_HEADER_PROPERTY, setOutputURI("vcfHeader.obj")); conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, false); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.calrissian.accumulorecipes.commons.hadoop.BaseQfdInputFormat.java
License:Apache License
/** * Sets selection fields on the current configuration. *///from w w w. j a v a 2 s.c o m public static void setSelectFields(Configuration config, Set<String> selectFields) { if (selectFields != null) config.setStrings(SELECT_FIELDS, selectFields.toArray(new String[] {})); }
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Use this before submitting a graph map job. It will appropriately set up * the job./*from ww w . j a v a 2 s . c o m*/ * * @param query * The query defining the {@link GraphMapper} input graphs. * @param mapper * The mapper class to use. * @param outputKeyClass * The class of the output key. * @param outputValueClass * The class of the output value. * @param job * The current job to adjust. Make sure the passed job is carrying * all necessary HBase configuration. * @param addDependencyJars * upload HBase jars and jars for any of the configured job classes * via the distributed cache (tmpjars). * @throws IOException * When setting up the details fails. */ public static void setupGraphMapperJob(Query query, Class<? extends GraphMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); PlasmaType type = getRootType(query); Where where = query.getModel().findWhereClause(); SelectionCollector selectionCollector = null; if (where != null) selectionCollector = new SelectionCollector(query.getModel().getSelectClause(), where, type); else selectionCollector = new SelectionCollector(query.getModel().getSelectClause(), type); selectionCollector.setOnlyDeclaredProperties(false); // FIXME: generalize collectRowKeyProperties for (Type t : selectionCollector.getTypes()) collectRowKeyProperties(selectionCollector, (PlasmaType) t); // FIXME: just need the root table reader - remove DistributedGraphReader graphReader = new DistributedGraphReader(type, selectionCollector.getTypes(), null); HBaseFilterAssembler columnFilterAssembler = new GraphFetchColumnFilterAssembler(selectionCollector, type); Filter columnFilter = columnFilterAssembler.getFilter(); From from = query.getModel().getFromClause(); List<Scan> scans = createScans(from, where, type, columnFilter, conf); conf.set(GraphInputFormat.QUERY, marshal(query)); conf.set(GraphInputFormat.ROOT_TABLE, graphReader.getRootTableReader().getTableName()); List<String> scanStrings = new ArrayList<String>(); for (Scan scan : scans) { scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(graphReader.getRootTableReader().getTableName())); scanStrings.add(convertScanToString(scan)); } conf.setStrings(GraphInputFormat.SCANS, scanStrings.toArray(new String[scanStrings.size()])); if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
From source file:org.elasticsearch.hadoop.mr.MultiOutputFormat.java
License:Apache License
public static void addOutputFormat(Configuration cfg, Class<? extends OutputFormat>... formats) { Collection<String> of = cfg.getStringCollection(CFG_FIELD); for (Class<? extends OutputFormat> format : formats) { of.add(format.getName());/*w ww . j a v a 2 s. c o m*/ } cfg.setStrings(CFG_FIELD, StringUtils.join(of, ",")); }
From source file:org.gridgain.grid.ggfs.GridGgfsHadoopFileSystemLoggerStateSelfTest.java
License:Open Source License
/** * Instantiate new file system./*w w w . j av a 2 s .c om*/ * * @return New file system. * @throws Exception If failed. */ private GridGgfsHadoopFileSystem fileSystem() throws Exception { Configuration fsCfg = new Configuration(); fsCfg.addResource(U.resolveGridGainUrl("modules/core/src/test/config/hadoop/core-site-loopback.xml")); fsCfg.setBoolean("fs.ggfs.impl.disable.cache", true); if (logging) fsCfg.setBoolean(String.format(PARAM_GGFS_LOG_ENABLED, "ggfs:ggfs-grid@"), logging); fsCfg.setStrings(String.format(PARAM_GGFS_LOG_DIR, "ggfs:ggfs-grid@"), U.getGridGainHome()); return (GridGgfsHadoopFileSystem) FileSystem.get(new URI("ggfs://ggfs:ggfs-grid@/"), fsCfg); }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java
License:Open Source License
/** * @throws Exception If failed./*from w ww . ja va 2s .com*/ */ public void testMapRun() throws Exception { int lineCnt = 10000; String fileName = "/testFile"; prepareFile(fileName, lineCnt); totalLineCnt.set(0); taskWorkDirs.clear(); Configuration cfg = new Configuration(); cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName()); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/")); FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output/")); job.setJarByClass(getClass()); GridFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration())); fut.get(); assertEquals(lineCnt, totalLineCnt.get()); assertEquals(32, taskWorkDirs.size()); }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java
License:Open Source License
/** * @throws Exception If failed.//from w w w. j a va 2 s . c o m */ public void testMapCombineRun() throws Exception { int lineCnt = 10001; String fileName = "/testFile"; prepareFile(fileName, lineCnt); totalLineCnt.set(0); taskWorkDirs.clear(); Configuration cfg = new Configuration(); cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName()); cfg.setBoolean(MAP_WRITE, true); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestCombiner.class); job.setReducerClass(TestReducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/")); FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output")); job.setJarByClass(getClass()); GridHadoopJobId jobId = new GridHadoopJobId(UUID.randomUUID(), 2); GridFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); fut.get(); assertEquals(lineCnt, totalLineCnt.get()); assertEquals(34, taskWorkDirs.size()); for (int g = 0; g < gridCount(); g++) grid(g).hadoop().finishFuture(jobId).get(); }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java
License:Open Source License
/** * @throws Exception If failed./*from www . j a v a2s . c o m*/ */ public void testMapperException() throws Exception { prepareFile("/testFile", 1000); Configuration cfg = new Configuration(); cfg.setStrings("fs.ggfs.impl", GridGgfsHadoopFileSystem.class.getName()); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(FailMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("ggfs://:" + getTestGridName(0) + "@/")); FileOutputFormat.setOutputPath(job, new Path("ggfs://:" + getTestGridName(0) + "@/output/")); job.setJarByClass(getClass()); final GridFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 3), createJobInfo(job.getConfiguration())); GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { fut.get(); return null; } }, GridException.class, null); }