Example usage for org.apache.hadoop.conf Configuration setIfUnset

List of usage examples for org.apache.hadoop.conf Configuration setIfUnset

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setIfUnset.

Prototype

public synchronized void setIfUnset(String name, String value) 

Source Link

Document

Sets a property if it is currently unset.

Usage

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

/**
 * Modify the config and start up additional DataNodes. The info port for
 * DataNodes is guaranteed to use a free port.
 * /* w w  w . j a  v a  2s  .  co m*/
 * Data nodes can run with the name node in the mini cluster or
 * a real name node. For example, running with a real name node is useful
 * when running simulated data nodes with a real name node.
 * If minicluster's name node is null assume that the conf has been
 * set with the right address:port of the name node.
 *
 * @param conf
 *            the base configuration to use in starting the DataNodes. This
 *            will be modified as necessary.
 * @param numDataNodes
 *            Number of DataNodes to start; may be zero
 * @param manageDfsDirs
 *            if true, the data directories for DataNodes will be
 *            created and {@link #DFS_DATANODE_DATA_DIR_KEY} will be
 *            set in the conf
 * @param operation
 *            the operation with which to start the DataNodes. If null
 *            or StartupOption.FORMAT, then StartupOption.REGULAR will be used.
 * @param racks
 *            array of strings indicating the rack that each DataNode is on
 * @param hosts
 *            array of strings indicating the hostnames for each DataNode
 * @param simulatedCapacities
 *            array of capacities of the simulated data nodes
 * @param setupHostsFile
 *            add new nodes to dfs hosts files
 * @param checkDataNodeAddrConfig
 *            if true, only set DataNode port addresses if not already set in config
 * @param checkDataNodeHostConfig
 *            if true, only set DataNode hostname key if not already set in config
 * @param dnConfOverlays
 *            An array of {@link Configuration} objects that will overlay the
 *            global MiniDFSCluster Configuration for the corresponding DataNode.
 * @throws IllegalStateException
 *             if NameNode has been shutdown
 */
public synchronized void startDataNodes(Configuration conf, int numDataNodes, StorageType storageType,
        boolean manageDfsDirs, StartupOption operation, String[] racks, String[] hosts,
        long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig,
        boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays) throws IOException {
    if (operation == StartupOption.RECOVER) {
        return;
    }
    if (checkDataNodeHostConfig) {
        conf.setIfUnset(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
    } else {
        conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
    }

    int curDatanodesNum = dataNodes.size();
    // for mincluster's the default initialDelay for BRs is 0
    if (conf.get(DFS_BLOCKREPORT_INITIAL_DELAY_KEY) == null) {
        conf.setLong(DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 0);
    }
    // If minicluster's name node is null assume that the conf has been
    // set with the right address:port of the name node.
    //
    if (racks != null && numDataNodes > racks.length) {
        throw new IllegalArgumentException("The length of racks [" + racks.length
                + "] is less than the number of datanodes [" + numDataNodes + "].");
    }
    if (hosts != null && numDataNodes > hosts.length) {
        throw new IllegalArgumentException("The length of hosts [" + hosts.length
                + "] is less than the number of datanodes [" + numDataNodes + "].");
    }
    // Generate some hostnames if required
    if (racks != null && hosts == null) {
        hosts = new String[numDataNodes];
        for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) {
            hosts[i - curDatanodesNum] = "host" + i + ".foo.com";
        }
    }

    if (simulatedCapacities != null && numDataNodes > simulatedCapacities.length) {
        throw new IllegalArgumentException("The length of simulatedCapacities [" + simulatedCapacities.length
                + "] is less than the number of datanodes [" + numDataNodes + "].");
    }

    if (dnConfOverlays != null && numDataNodes > dnConfOverlays.length) {
        throw new IllegalArgumentException("The length of dnConfOverlays [" + dnConfOverlays.length
                + "] is less than the number of datanodes [" + numDataNodes + "].");
    }

    String[] dnArgs = (operation == null || operation != StartupOption.ROLLBACK) ? null
            : new String[] { operation.getName() };

    for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) {
        Configuration dnConf = new HdfsConfiguration(conf);
        if (dnConfOverlays != null) {
            dnConf.addResource(dnConfOverlays[i]);
        }
        // Set up datanode address
        setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
        if (manageDfsDirs) {
            String dirs = makeDataNodeDirs(i, storageType);
            dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
            conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
        }
        if (simulatedCapacities != null) {
            SimulatedFSDataset.setFactory(dnConf);
            dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY,
                    simulatedCapacities[i - curDatanodesNum]);
        }
        LOG.info("Starting DataNode " + i + " with " + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + ": "
                + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY));
        if (hosts != null) {
            dnConf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, hosts[i - curDatanodesNum]);
            LOG.info("Starting DataNode " + i + " with hostname set to: "
                    + dnConf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY));
        }
        if (racks != null) {
            String name = hosts[i - curDatanodesNum];
            LOG.info("Adding node with hostname : " + name + " to rack " + racks[i - curDatanodesNum]);
            StaticMapping.addNodeToRack(name, racks[i - curDatanodesNum]);
        }
        Configuration newconf = new HdfsConfiguration(dnConf); // save config
        if (hosts != null) {
            NetUtils.addStaticResolution(hosts[i - curDatanodesNum], "localhost");
        }

        SecureResources secureResources = null;
        if (UserGroupInformation.isSecurityEnabled() && conf.get(DFS_DATA_TRANSFER_PROTECTION_KEY) == null) {
            try {
                secureResources = SecureDataNodeStarter.getSecureResources(dnConf);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
        final int maxRetriesOnSasl = conf.getInt(IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY,
                IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_DEFAULT);
        int numRetries = 0;
        DataNode dn = null;
        while (true) {
            try {
                dn = DataNode.instantiateDataNode(dnArgs, dnConf, secureResources);
                break;
            } catch (IOException e) {
                // Work around issue testing security where rapidly starting multiple
                // DataNodes using the same principal gets rejected by the KDC as a
                // replay attack.
                if (UserGroupInformation.isSecurityEnabled() && numRetries < maxRetriesOnSasl) {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException ie) {
                        Thread.currentThread().interrupt();
                        break;
                    }
                    ++numRetries;
                    continue;
                }
                throw e;
            }
        }
        if (dn == null)
            throw new IOException("Cannot start DataNode in " + dnConf.get(DFS_DATANODE_DATA_DIR_KEY));
        // since the HDFS does things based on host|ip:port, we need to add the
        // mapping for the service to rackId
        String service = SecurityUtil.buildTokenService(dn.getXferAddress()).toString();
        if (racks != null) {
            LOG.info("Adding node with service : " + service + " to rack " + racks[i - curDatanodesNum]);
            StaticMapping.addNodeToRack(service, racks[i - curDatanodesNum]);
        }
        dn.runDatanodeDaemon();
        dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources, dn.getIpcPort()));
    }
    curDatanodesNum += numDataNodes;
    this.numDataNodes += numDataNodes;
    waitActive();
}

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile, boolean checkDataNodeAddrConfig)
        throws IOException {
    if (setupHostsFile) {
        String hostsFile = conf.get(DFS_HOSTS, "").trim();
        if (hostsFile.length() == 0) {
            throw new IOException("Parameter dfs.hosts is not setup in conf");
        }/* www .j  ava 2  s . c o  m*/
        // Setup datanode in the include file, if it is defined in the conf
        String address = "127.0.0.1:" + NetUtils.getFreeSocketPort();
        if (checkDataNodeAddrConfig) {
            conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, address);
        } else {
            conf.set(DFS_DATANODE_ADDRESS_KEY, address);
        }
        addToFile(hostsFile, address);
        LOG.info("Adding datanode " + address + " to hosts file " + hostsFile);
    } else {
        if (checkDataNodeAddrConfig) {
            conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
        } else {
            conf.set(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
        }
    }
    if (checkDataNodeAddrConfig) {
        conf.setIfUnset(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
        conf.setIfUnset(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
    } else {
        conf.set(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
        conf.set(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
    }
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*w ww  . ja  va2  s.com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   w  ww  .  j a  v  a2 s  .co  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from  w w w. j a v a 2s  .co  m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }// w  ww.j  av  a 2  s  . c  o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*w  ww .j  a v a 2 s .  c om*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.spotify.ratatool.GcsConfiguration.java

License:Apache License

public static Configuration get() {
    Configuration conf = new Configuration();
    File cloudConfigPath;//from  w  w  w. ja  va  2 s . c o m
    if (isWindows()) {
        cloudConfigPath = new File(getEnvironment().get("APPDATA"), "gcloud");
    } else {
        cloudConfigPath = new File(System.getProperty("user.home"), ".config/gcloud");
    }
    File credentialFilePath = new File(cloudConfigPath, "application_default_credentials.json");
    if (!credentialFilePath.exists()) {
        return conf;
    }

    try {
        JsonFactory jsonFactory = Utils.getDefaultJsonFactory();
        InputStream inputStream = new FileInputStream(credentialFilePath);
        JsonObjectParser parser = new JsonObjectParser(jsonFactory);
        GenericJson fileContents = parser.parseAndClose(inputStream, Charsets.UTF_8, GenericJson.class);
        String fileType = (String) fileContents.get("type");
        if ("authorized_user".equals(fileType)) {
            String clientId = (String) fileContents.get("client_id");
            String clientSecret = (String) fileContents.get("client_secret");
            if (clientId != null && clientSecret != null) {
                LOG.debug("Using GCP user credential from '{}'", credentialFilePath);
                conf.setIfUnset("fs.gs.impl", GoogleHadoopFileSystem.class.getName());
                conf.setIfUnset("fs.AbstractFileSystem.gs.impl", GoogleHadoopFS.class.getName());
                conf.setIfUnset(GoogleHadoopFileSystemBase.GCS_PROJECT_ID_KEY, defaultProject());
                conf.setIfUnset(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, "/hadoop");

                conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX
                        + HadoopCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX, "false");
                conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX
                        + HadoopCredentialConfiguration.CLIENT_ID_SUFFIX, clientId);
                conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX
                        + HadoopCredentialConfiguration.CLIENT_SECRET_SUFFIX, clientSecret);
            }
        }
    } catch (IOException e) {
        LOG.warn("Failed to load GCP user credential from '{}'", credentialFilePath);
    }
    return conf;
}

From source file:com.yahoo.labs.yamall.hadoop.Test.java

License:Open Source License

/**
 * Run the map/reduce job//  w ww  .j  av a  2 s .  co m
 */
public final int run(final String[] args) throws Exception {

    startLogger(Level.INFO);

    Configuration conf = getConf();
    conf.set("yamall.vw_model", args[2]);
    conf.setIfUnset("yamall.bit_precision", "18");
    conf.setIfUnset("yamall.parser", "vw");

    // Print to screen all the options
    TreeMap<String, String> map = new TreeMap<String, String>();
    for (Map.Entry<String, String> entry : conf) {
        map.put(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, String> entry : map.entrySet()) {
        System.out.printf("%s=%s\n", entry.getKey(), entry.getValue());
    }

    Job job = Job.getInstance(conf, "Yamall Test on MapReduce");
    job.setNumReduceTasks(1);
    job.setJarByClass(Test.class);
    job.setMapperClass(TestMapper.class);
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setReducerClass(TestReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(CompositeDoubleTextWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    MultipleOutputs.addNamedOutput(job, "out", TextOutputFormat.class, NullWritable.class, Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yahoo.labs.yamall.hadoop.Train.java

License:Open Source License

/**
 * Run the map/reduce job/*from   w  w w .  j ava2s  . com*/
 */
public final int run(final String[] args) throws Exception {

    startLogger(Level.INFO);

    Configuration conf = getConf();
    conf.set("yamall.output", args[1]);
    conf.setIfUnset("yamall.bit_precision", "18");
    conf.setIfUnset("yamall.parser", "vw");

    // Print to screen all the options
    TreeMap<String, String> map = new TreeMap<String, String>();
    for (Map.Entry<String, String> entry : conf) {
        map.put(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, String> entry : map.entrySet()) {
        System.out.printf("%s=%s\n", entry.getKey(), entry.getValue());
    }

    Job job = Job.getInstance(conf, "Yamall Train on MapReduce");
    job.setNumReduceTasks(1); // important
    job.setJarByClass(Train.class);
    job.setMapperClass(TrainMapper.class);
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(InstanceOrHashMapWritable.class);
    job.setReducerClass(TrainReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}