Example usage for org.apache.hadoop.conf Configuration setStrings

List of usage examples for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values) 

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, FeatureSet inputSet, Object... parameters) {
    this.mapReducePlugin = mapReducePlugin;
    try {/*www . j ava  2s . c o m*/
        CreateUpdateManager manager = SWQEFactory.getModelManager();
        //outputSet should attach to the original reference
        this.outputSet = manager.buildFeatureSet().setReferenceID(inputSet.getReferenceID()).build();
        manager.close();

        // do setup for Map/Reduce from the HBase API
        String tableName = generateTableName(inputSet);
        String destTableName = generateTableName(outputSet);

        Configuration conf = new Configuration();
        HBaseStorage.configureHBaseConfig(conf);
        HBaseConfiguration.addHbaseResources(conf);

        // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers
        byte[] sSet = SWQEFactory.getSerialization().serialize(inputSet);
        byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet);

        String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet);

        File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)));
        if (file.exists()) {
            conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class));
        }
        conf.setStrings(EXT_PARAMETERS, str_params);
        conf.set("mapreduce.map.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.reduce.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.map.ulimit", "4194304");
        conf.set("mapreduce.reduce.ulimit", "4194304");
        conf.set("mapreduce.map.memory.mb", "4096");
        conf.set("mapreduce.reduce.memory.mb", "4096");
        conf.set("mapreduce.map.memory.physical.mb", "4096");
        conf.set("mapreduce.reduce.memory.physical.mb", "4096");
        // the above settings all seem to be ignored by hboot
        // TODO: only this one works, but as far I know, we're using mapreduce not mapred.
        // Strange
        conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc");

        this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName());

        Scan scan = new Scan();
        scan.setMaxVersions(); // we need all version data
        scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false); // don't set to true for MR jobs
        byte[] qualiferBytes = Bytes.toBytes(inputSet.getSGID().getUuid().toString());
        scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes);
        scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes)));

        // handle the part that changes from job to job
        // pluginInterface.performVariableInit(tableName, destTableName, scan);
        TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name
                scan, // Scan instance to control CF and attribute selection
                PluginRunnerMapper.class, // mapper
                mapReducePlugin.getMapOutputKeyClass(), // mapper output key 
                mapReducePlugin.getMapOutputValueClass(), // mapper output value
                job);
        job.setOutputFormatClass(mapReducePlugin.getOutputClass()); // because we aren't emitting anything from mapper
        job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class
        job.setNumReduceTasks(mapReducePlugin.getNumReduceTasks());

        if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) {
            FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration());
            Path path = new Path(
                    "/tmp/" + new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString());
            path = fileContext.makeQualified(path);
            TextOutputFormat.setOutputPath(job, path); // adjust directories as required
        }

        TableMapReduceUtil.addDependencyJars(job);
        job.setJarByClass(MRHBasePluginRunner.class);
        // submit the job, but do not block
        job.submit();
    } catch (URISyntaxException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (InterruptedException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (ClassNotFoundException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (IOException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    }
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

/**
 * //  w ww  . java 2 s . c  o  m
 * @param mapReducePlugin the particular plugin to instantiate and run
 * @param reference a reference (has to be provided in lieu of a feature set) 
 * @param inputSet a set of feature sets to operate on
 * @param parameters an arbitrary number of external parameters for plugin developers to provide to their plugins
 */
public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, Reference reference, List<FeatureSet> inputSet,
        Object... parameters) {
    // handle null inputSet
    if (inputSet == null) {
        inputSet = new ArrayList<FeatureSet>();
    }
    // we should either have a reference or more than one input set
    assert (reference != null || inputSet.size() > 0);
    // all feature sets should have the same reference
    if (inputSet.size() > 0) {
        SGID ref = inputSet.iterator().next().getReference().getSGID();
        for (FeatureSet set : inputSet) {
            assert (set.getReferenceID().equals(ref));
        }
    }

    SGID referenceSGID = reference != null ? reference.getSGID() : inputSet.iterator().next().getReferenceID();

    this.mapReducePlugin = mapReducePlugin;
    try {
        CreateUpdateManager manager = SWQEFactory.getModelManager();
        //outputSet should attach to the original reference
        this.outputSet = manager.buildFeatureSet().setReferenceID(referenceSGID).build();
        manager.close();

        // do setup for Map/Reduce from the HBase API
        String tableName = generateTableName(outputSet);
        String destTableName = generateTableName(outputSet);

        Configuration conf = new Configuration();
        HBaseStorage.configureHBaseConfig(conf);
        HBaseConfiguration.addHbaseResources(conf);

        // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers
        byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet);
        for (int i = 0; i < sSet.length; i++) {
            sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i));
        }
        byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet);

        String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet);

        File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)));
        if (file.exists()) {
            conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class));
        }
        conf.setStrings(EXT_PARAMETERS, str_params);
        conf.set("mapreduce.map.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.reduce.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.map.ulimit", "4194304");
        conf.set("mapreduce.reduce.ulimit", "4194304");
        conf.set("mapreduce.map.memory.mb", "4096");
        conf.set("mapreduce.reduce.memory.mb", "4096");
        conf.set("mapreduce.map.memory.physical.mb", "4096");
        conf.set("mapreduce.reduce.memory.physical.mb", "4096");

        conf.set("mapred.job.map.memory.mb", "4096");
        conf.set("mapred.job.reduce.memory.mb", "4096");

        // the above settings all seem to be ignored by hboot
        // TODO: only this one works, but as far I know, we're using mapreduce not mapred.
        // Strange
        conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc");

        this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName());

        Scan scan = new Scan();
        scan.setMaxVersions(); // we need all version data
        scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false); // don't set to true for MR jobs
        for (FeatureSet set : inputSet) {
            byte[] qualiferBytes = Bytes.toBytes(set.getSGID().getUuid().toString());
            scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes);
        }
        // this might be redundant, check this!!!! 
        // scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes)));

        // handle the part that changes from job to job
        // pluginInterface.performVariableInit(tableName, destTableName, scan);
        TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name
                scan, // Scan instance to control CF and attribute selection
                PluginRunnerMapper.class, // mapper
                mapReducePlugin.getMapOutputKeyClass(), // mapper output key 
                mapReducePlugin.getMapOutputValueClass(), // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(tableName, PluginRunnerReducer.class, job);

        if (mapReducePlugin.getOutputClass() != null) {
            job.setOutputFormatClass(mapReducePlugin.getOutputClass());
        }
        job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class

        if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) {
            FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration());
            FileSystem fs = FileSystem.get(job.getConfiguration());
            Path path = new Path(fs.getHomeDirectory(),
                    new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString());
            path = fileContext.makeQualified(path);
            TextOutputFormat.setOutputPath(job, path); // adjust directories as required
        }

        job.setJarByClass(MRHBasePluginRunner.class);
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.addDependencyJars(conf, MRHBasePluginRunner.class,
                MRHBasePluginRunner.PluginRunnerMapper.class, MRHBasePluginRunner.PluginRunnerReducer.class);
        // submit the job, but do not block
        job.submit();
    } catch (URISyntaxException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (InterruptedException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (ClassNotFoundException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (IOException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    }
}

From source file:com.github.seqware.queryengine.plugins.runners.inmemory.InMemoryPluginRunner.java

License:Open Source License

public InMemoryPluginRunner(PluginInterface pluginInterface, Reference reference, List<FeatureSet> inputSet,
        Object[] parameters) {/*from w w w .  j  av  a 2  s  .com*/
    if (reference != null) {
        throw new UnsupportedOperationException();
    }

    this.pluginInterface = pluginInterface;
    CreateUpdateManager manager = SWQEFactory.getModelManager();
    //outputSet should attach to the original reference
    FeatureSet outputSet = manager.buildFeatureSet().setReferenceID(inputSet.iterator().next().getReferenceID())
            .build();
    manager.close();

    byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet);
    for (int i = 0; i < sSet.length; i++) {
        sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i));
    }
    byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet);
    // pretend to serialize parameters 
    this.serializedParameters = MRHBasePluginRunner.serializeParametersToString(parameters, pluginInterface,
            sSet, dSet);
    // pretend to de-serialize 
    Configuration conf = new Configuration();
    String[] str_params = serializeParametersToString(parameters, pluginInterface, sSet, dSet);
    conf.setStrings(EXT_PARAMETERS, str_params);
    Job job = null;
    try {
        job = new Job(conf);
    } catch (IOException ex) {
        Rethrow.rethrow(ex);
    }
    Class plugin = MRHBasePluginRunner.transferConfiguration(job, this);

    // this is not currently asynchronous
    if (pluginInterface instanceof MapReducePlugin) {
        MapReducePlugin mrPlugin = null;
        try {
            mrPlugin = (MapReducePlugin) plugin.newInstance();
        } catch (InstantiationException ex) {
            Rethrow.rethrow(ex);
        } catch (IllegalAccessException ex) {
            Rethrow.rethrow(ex);
        }

        mrPlugin.mapInit(this);

        Map<Long, Map<FeatureSet, Collection<Feature>>> map = new HashMap<Long, Map<FeatureSet, Collection<Feature>>>();
        for (FeatureSet set : inputSet) {
            for (Feature feature : set) {
                for (long i = feature.getStart(); i <= feature.getStop(); i++) {
                    if (!map.containsKey(i)) {
                        map.put(i, new HashMap<FeatureSet, Collection<Feature>>());
                    }
                    if (!map.get(i).containsKey(set)) {
                        map.get(i).put(set, new ArrayList<Feature>());
                    }
                    map.get(i).get(set).add(feature);
                }
            }
        }
        // mimic filtering 
        for (Entry<Long, Map<FeatureSet, Collection<Feature>>> e : map.entrySet()) {
            Map<FeatureSet, Collection<Feature>> innerMap = MRHBasePluginRunner
                    .handlePreFilteredPlugins(e.getValue(), mrPlugin, this.ext_parameters);
            // not sure what to do for position here
            mrPlugin.map(e.getKey(), innerMap, this);
            mrPlugin.mapCleanup();
        }

        mrPlugin.reduceInit();
        // TODO: make this pass through functional in order to simulate MapReduce
        for (Feature f : inputSet.iterator().next()) {
            mrPlugin.reduce(null, null, this);
        }
        mrPlugin.reduceCleanup();

        mrPlugin.cleanup();
    } else {
        throw new UnsupportedOperationException("Scan plugins not supported yet");
    }
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Add a Filter to be instantiated on import
 * @param conf Configuration to update (will be passed to the job)
 * @param clazz {@link Filter} subclass to instantiate on the server.
 * @param filterArgs List of arguments to pass to the filter on instantiation
 *///from   w  ww  .  j a  va2 s. c o  m
public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz,
        List<String> filterArgs) {
    conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName());
    conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()]));
}

From source file:com.hortonworks.minicluster.MiniHadoopCluster.java

License:Apache License

/**
  *//from  w  w w .j a va 2s. c  o  m
  */
@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true);
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    this.addService(new ResourceManagerWrapper());
    for (int index = 0; index < this.nodeManagers.length; index++) {
        this.nodeManagers[index] = new ShortCircuitedNodeManager();
        this.addService(new NodeManagerWrapper(index));
    }
    super.serviceInit(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
}

From source file:com.ibm.stocator.fs.commom.unittests.StocatorPathTest.java

License:Open Source License

@Before
public final void before() {
    mStocatorPath = PowerMockito.mock(StocatorPath.class);
    Whitebox.setInternalState(mStocatorPath, "tempIdentifiers", new String[] { pattern1 });
    Configuration conf = new Configuration();
    conf.setStrings("fs.stocator.temp.identifier", pattern1);
    stocPath = new StocatorPath(DEFAULT_FOUTPUTCOMMITTER_V1, conf, hostname);
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfigUtil.java

License:Apache License

public static void setSourceTags(Configuration conf, DBObject tags) {
    conf.setStrings(SOURCE_TAGS, tags.toString());
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfigUtil.java

License:Apache License

public static void setCacheList(Configuration conf, BasicDBList cacheList) {
    conf.setStrings(CACHE_LIST, cacheList.toString());
}

From source file:com.kenshoo.integrations.plugins.connectors.GCSFileProtocol.java

License:Apache License

@Override
public void setupAuthentication(GenericConfiguration genericConfiguration, Configuration configuration,
        String arg1, String arg2) {
    configuration.setBoolean("fs.gcsfs.impl.disable.cache", true);
    configuration.setStrings("fs.gcsfs.impl", "com.kenshoo.integrations.plugins.connectors.GCSFileSystem");

    String accessToken = genericConfiguration.getStringProperty(PROPERTY_KEY_ACCESS_TOKEN, null);
    if (accessToken != null) {
        configuration.setStrings(PROPERTY_KEY_ACCESS_TOKEN, accessToken);
    }/*from   w w  w  .  ja  v  a  2s . c o  m*/
    String refreshToken = genericConfiguration.getStringProperty(PROPERTY_KEY_REFRESH_TOKEN, null);
    if (refreshToken != null) {
        configuration.setStrings(PROPERTY_KEY_REFRESH_TOKEN, refreshToken);
    }
    Long accessTokenCreationTime = genericConfiguration.getLongProperty(PROPERTY_KEY_ACCESS_TOKEN_CREATION_TIME,
            null);
    if (accessTokenCreationTime != null) {
        configuration.setLong(PROPERTY_KEY_ACCESS_TOKEN_CREATION_TIME, accessTokenCreationTime);
    }
    Long accessTokenExpirationAfter = genericConfiguration
            .getLongProperty(PROPERTY_KEY_ACCESS_TOKEN_EXPIRES_AFTER, null);
    if (accessTokenExpirationAfter != null) {
        configuration.setLong(PROPERTY_KEY_ACCESS_TOKEN_EXPIRES_AFTER, accessTokenExpirationAfter);
    }
    String relayURL = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_RELAY_URL, null);
    if (relayURL != null) {
        configuration.setStrings(PROPERTY_KEY_OAUTH_RELAY_URL, relayURL);
    }
    String clientId = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_CLIENT_ID, null);
    if (clientId != null) {
        configuration.setStrings(PROPERTY_KEY_OAUTH_CLIENT_ID, clientId);
    }
    String secret = genericConfiguration.getStringProperty(PROPERTY_KEY_OAUTH_SECRET, null);
    if (secret != null) {
        configuration.setStrings(PROPERTY_KEY_OAUTH_SECRET, secret);
    }
}

From source file:com.lakala.hbase.IndexBuilder2.java

License:Apache License

/**
 * Job configuration.//from ww w . jav  a 2 s . c o  m
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String columnFamily = args[1];
    System.out.println("****" + tableName);
    conf.set(TableInputFormat.SCAN, convertScanToString(new Scan()));
    conf.set(TableInputFormat.INPUT_TABLE, tableName);
    conf.set("index.tablename", tableName);
    conf.set("index.familyname", columnFamily);
    String[] fields = new String[args.length - 2];
    for (int i = 0; i < fields.length; i++) {
        fields[i] = args[i + 2];
    }
    conf.setStrings("index.fields", fields);
    conf.set("index.familyname", "attributes");
    Job job = new Job(conf, tableName);
    job.setJarByClass(IndexBuilder.class);
    job.setMapperClass(Map.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TableInputFormat.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);
    return job;
}