List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, FeatureSet inputSet, Object... parameters) { this.mapReducePlugin = mapReducePlugin; try {/*www . j ava 2s . c o m*/ CreateUpdateManager manager = SWQEFactory.getModelManager(); //outputSet should attach to the original reference this.outputSet = manager.buildFeatureSet().setReferenceID(inputSet.getReferenceID()).build(); manager.close(); // do setup for Map/Reduce from the HBase API String tableName = generateTableName(inputSet); String destTableName = generateTableName(outputSet); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers byte[] sSet = SWQEFactory.getSerialization().serialize(inputSet); byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet); String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet); File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class))); if (file.exists()) { conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)); } conf.setStrings(EXT_PARAMETERS, str_params); conf.set("mapreduce.map.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.map.ulimit", "4194304"); conf.set("mapreduce.reduce.ulimit", "4194304"); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.map.memory.physical.mb", "4096"); conf.set("mapreduce.reduce.memory.physical.mb", "4096"); // the above settings all seem to be ignored by hboot // TODO: only this one works, but as far I know, we're using mapreduce not mapred. // Strange conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc"); this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName()); Scan scan = new Scan(); scan.setMaxVersions(); // we need all version data scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs byte[] qualiferBytes = Bytes.toBytes(inputSet.getSGID().getUuid().toString()); scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes); scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes))); // handle the part that changes from job to job // pluginInterface.performVariableInit(tableName, destTableName, scan); TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name scan, // Scan instance to control CF and attribute selection PluginRunnerMapper.class, // mapper mapReducePlugin.getMapOutputKeyClass(), // mapper output key mapReducePlugin.getMapOutputValueClass(), // mapper output value job); job.setOutputFormatClass(mapReducePlugin.getOutputClass()); // because we aren't emitting anything from mapper job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class job.setNumReduceTasks(mapReducePlugin.getNumReduceTasks()); if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration()); Path path = new Path( "/tmp/" + new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString()); path = fileContext.makeQualified(path); TextOutputFormat.setOutputPath(job, path); // adjust directories as required } TableMapReduceUtil.addDependencyJars(job); job.setJarByClass(MRHBasePluginRunner.class); // submit the job, but do not block job.submit(); } catch (URISyntaxException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } }
From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java
License:Open Source License
/** * // w ww . java 2 s . c o m * @param mapReducePlugin the particular plugin to instantiate and run * @param reference a reference (has to be provided in lieu of a feature set) * @param inputSet a set of feature sets to operate on * @param parameters an arbitrary number of external parameters for plugin developers to provide to their plugins */ public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, Reference reference, List<FeatureSet> inputSet, Object... parameters) { // handle null inputSet if (inputSet == null) { inputSet = new ArrayList<FeatureSet>(); } // we should either have a reference or more than one input set assert (reference != null || inputSet.size() > 0); // all feature sets should have the same reference if (inputSet.size() > 0) { SGID ref = inputSet.iterator().next().getReference().getSGID(); for (FeatureSet set : inputSet) { assert (set.getReferenceID().equals(ref)); } } SGID referenceSGID = reference != null ? reference.getSGID() : inputSet.iterator().next().getReferenceID(); this.mapReducePlugin = mapReducePlugin; try { CreateUpdateManager manager = SWQEFactory.getModelManager(); //outputSet should attach to the original reference this.outputSet = manager.buildFeatureSet().setReferenceID(referenceSGID).build(); manager.close(); // do setup for Map/Reduce from the HBase API String tableName = generateTableName(outputSet); String destTableName = generateTableName(outputSet); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet); for (int i = 0; i < sSet.length; i++) { sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i)); } byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet); String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet); File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class))); if (file.exists()) { conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)); } conf.setStrings(EXT_PARAMETERS, str_params); conf.set("mapreduce.map.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.map.ulimit", "4194304"); conf.set("mapreduce.reduce.ulimit", "4194304"); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.map.memory.physical.mb", "4096"); conf.set("mapreduce.reduce.memory.physical.mb", "4096"); conf.set("mapred.job.map.memory.mb", "4096"); conf.set("mapred.job.reduce.memory.mb", "4096"); // the above settings all seem to be ignored by hboot // TODO: only this one works, but as far I know, we're using mapreduce not mapred. // Strange conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc"); this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName()); Scan scan = new Scan(); scan.setMaxVersions(); // we need all version data scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs for (FeatureSet set : inputSet) { byte[] qualiferBytes = Bytes.toBytes(set.getSGID().getUuid().toString()); scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes); } // this might be redundant, check this!!!! // scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes))); // handle the part that changes from job to job // pluginInterface.performVariableInit(tableName, destTableName, scan); TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name scan, // Scan instance to control CF and attribute selection PluginRunnerMapper.class, // mapper mapReducePlugin.getMapOutputKeyClass(), // mapper output key mapReducePlugin.getMapOutputValueClass(), // mapper output value job); TableMapReduceUtil.initTableReducerJob(tableName, PluginRunnerReducer.class, job); if (mapReducePlugin.getOutputClass() != null) { job.setOutputFormatClass(mapReducePlugin.getOutputClass()); } job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration()); FileSystem fs = FileSystem.get(job.getConfiguration()); Path path = new Path(fs.getHomeDirectory(), new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString()); path = fileContext.makeQualified(path); TextOutputFormat.setOutputPath(job, path); // adjust directories as required } job.setJarByClass(MRHBasePluginRunner.class); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(conf, MRHBasePluginRunner.class, MRHBasePluginRunner.PluginRunnerMapper.class, MRHBasePluginRunner.PluginRunnerReducer.class); // submit the job, but do not block job.submit(); } catch (URISyntaxException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } }
From source file:com.github.seqware.queryengine.plugins.runners.inmemory.InMemoryPluginRunner.java
License:Open Source License
public InMemoryPluginRunner(PluginInterface pluginInterface, Reference reference, List<FeatureSet> inputSet, Object[] parameters) {/*from w w w . j av a 2 s .com*/ if (reference != null) { throw new UnsupportedOperationException(); } this.pluginInterface = pluginInterface; CreateUpdateManager manager = SWQEFactory.getModelManager(); //outputSet should attach to the original reference FeatureSet outputSet = manager.buildFeatureSet().setReferenceID(inputSet.iterator().next().getReferenceID()) .build(); manager.close(); byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet); for (int i = 0; i < sSet.length; i++) { sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i)); } byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet); // pretend to serialize parameters this.serializedParameters = MRHBasePluginRunner.serializeParametersToString(parameters, pluginInterface, sSet, dSet); // pretend to de-serialize Configuration conf = new Configuration(); String[] str_params = serializeParametersToString(parameters, pluginInterface, sSet, dSet); conf.setStrings(EXT_PARAMETERS, str_params); Job job = null; try { job = new Job(conf); } catch (IOException ex) { Rethrow.rethrow(ex); } Class plugin = MRHBasePluginRunner.transferConfiguration(job, this); // this is not currently asynchronous if (pluginInterface instanceof MapReducePlugin) { MapReducePlugin mrPlugin = null; try { mrPlugin = (MapReducePlugin) plugin.newInstance(); } catch (InstantiationException ex) { Rethrow.rethrow(ex); } catch (IllegalAccessException ex) { Rethrow.rethrow(ex); } mrPlugin.mapInit(this); Map<Long, Map<FeatureSet, Collection<Feature>>> map = new HashMap<Long, Map<FeatureSet, Collection<Feature>>>(); for (FeatureSet set : inputSet) { for (Feature feature : set) { for (long i = feature.getStart(); i <= feature.getStop(); i++) { if (!map.containsKey(i)) { map.put(i, new HashMap<FeatureSet, Collection<Feature>>()); } if (!map.get(i).containsKey(set)) { map.get(i).put(set, new ArrayList<Feature>()); } map.get(i).get(set).add(feature); } } } // mimic filtering for (Entry<Long, Map<FeatureSet, Collection<Feature>>> e : map.entrySet()) { Map<FeatureSet, Collection<Feature>> innerMap = MRHBasePluginRunner .handlePreFilteredPlugins(e.getValue(), mrPlugin, this.ext_parameters); // not sure what to do for position here mrPlugin.map(e.getKey(), innerMap, this); mrPlugin.mapCleanup(); } mrPlugin.reduceInit(); // TODO: make this pass through functional in order to simulate MapReduce for (Feature f : inputSet.iterator().next()) { mrPlugin.reduce(null, null, this); } mrPlugin.reduceCleanup(); mrPlugin.cleanup(); } else { throw new UnsupportedOperationException("Scan plugins not supported yet"); } }
From source file:com.google.cloud.bigtable.mapreduce.Import.java
License:Open Source License
/** * Add a Filter to be instantiated on import * @param conf Configuration to update (will be passed to the job) * @param clazz {@link Filter} subclass to instantiate on the server. * @param filterArgs List of arguments to pass to the filter on instantiation *///from w ww . j a va2 s. c o m public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz, List<String> filterArgs) { conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName()); conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()])); }
From source file:com.hortonworks.minicluster.MiniHadoopCluster.java
License:Apache License
/** *//from w w w .j a va 2s. c o m */ @Override public void serviceInit(Configuration conf) throws Exception { conf.setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true); conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); conf.setClass( String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); this.addService(new ResourceManagerWrapper()); for (int index = 0; index < this.nodeManagers.length; index++) { this.nodeManagers[index] = new ShortCircuitedNodeManager(); this.addService(new NodeManagerWrapper(index)); } super.serviceInit(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf)); }
From source file:com.ibm.stocator.fs.commom.unittests.StocatorPathTest.java
License:Open Source License
@Before public final void before() { mStocatorPath = PowerMockito.mock(StocatorPath.class); Whitebox.setInternalState(mStocatorPath, "tempIdentifiers", new String[] { pattern1 }); Configuration conf = new Configuration(); conf.setStrings("fs.stocator.temp.identifier", pattern1); stocPath = new StocatorPath(DEFAULT_FOUTPUTCOMMITTER_V1, conf, hostname); }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfigUtil.java
License:Apache License
public static void setSourceTags(Configuration conf, DBObject tags) { conf.setStrings(SOURCE_TAGS, tags.toString()); }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfigUtil.java
License:Apache License
public static void setCacheList(Configuration conf, BasicDBList cacheList) { conf.setStrings(CACHE_LIST, cacheList.toString()); }
From source file:com.kenshoo.integrations.plugins.connectors.GCSFileProtocol.java
License:Apache License
@Override public void setupAuthentication(GenericConfiguration genericConfiguration, Configuration configuration, String arg1, String arg2) { configuration.setBoolean("fs.gcsfs.impl.disable.cache", true); configuration.setStrings("fs.gcsfs.impl", "com.kenshoo.integrations.plugins.connectors.GCSFileSystem"); String accessToken = genericConfiguration.getStringProperty(PROPERTY_KEY_ACCESS_TOKEN, null); if (accessToken != null) { configuration.setStrings(PROPERTY_KEY_ACCESS_TOKEN, accessToken); }/*from w w w . ja v a 2s . c o m*/ String refreshToken = genericConfiguration.getStringProperty(PROPERTY_KEY_REFRESH_TOKEN, null); if (refreshToken != null) { configuration.setStrings(PROPERTY_KEY_REFRESH_TOKEN, refreshToken); } Long accessTokenCreationTime = genericConfiguration.getLongProperty(PROPERTY_KEY_ACCESS_TOKEN_CREATION_TIME, null); if (accessTokenCreationTime != null) { configuration.setLong(PROPERTY_KEY_ACCESS_TOKEN_CREATION_TIME, accessTokenCreationTime); } Long accessTokenExpirationAfter = genericConfiguration .getLongProperty(PROPERTY_KEY_ACCESS_TOKEN_EXPIRES_AFTER, null); if (accessTokenExpirationAfter != null) { configuration.setLong(PROPERTY_KEY_ACCESS_TOKEN_EXPIRES_AFTER, accessTokenExpirationAfter); } String relayURL = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_RELAY_URL, null); if (relayURL != null) { configuration.setStrings(PROPERTY_KEY_OAUTH_RELAY_URL, relayURL); } String clientId = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_CLIENT_ID, null); if (clientId != null) { configuration.setStrings(PROPERTY_KEY_OAUTH_CLIENT_ID, clientId); } String secret = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_SECRET, null); if (secret != null) { configuration.setStrings(PROPERTY_KEY_OAUTH_SECRET, secret); } }
From source file:com.lakala.hbase.IndexBuilder2.java
License:Apache License
/** * Job configuration.//from ww w . jav a 2 s . c o m */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.SCAN, convertScanToString(new Scan())); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; for (int i = 0; i < fields.length; i++) { fields[i] = args[i + 2]; } conf.setStrings("index.fields", fields); conf.set("index.familyname", "attributes"); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }