Example usage for org.apache.hadoop.mapreduce Job getJar

List of usage examples for org.apache.hadoop.mapreduce Job getJar

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJar.

Prototype

public String getJar() 

Source Link

Document

Get the pathname of the job's jar.

Usage

From source file:org.apache.accumulo.server.test.randomwalk.shard.SortTool.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }/* ww  w  .ja  v a 2  s  . c  om*/

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, seqFile);

    job.setPartitionerClass(KeyRangePartitioner.class);
    KeyRangePartitioner.setSplitFile(job, splitFile);

    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(splits.size() + 1);

    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    AccumuloFileOutputFormat.setOutputPath(job, new Path(outputDir));

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.randomwalk.multitable.CopyTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }/* w w w.j  a va2s .c  om*/

    ClientConfiguration clientConf = new ClientConfiguration().withInstance(args[3]).withZkHosts(args[4]);

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setInputTableName(job, args[2]);
    AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConf);

    final String principal;
    final AuthenticationToken token;
    if (clientConf.getBoolean(ClientProperty.INSTANCE_RPC_SASL_ENABLED.getKey(), false)) {
        // Use the Kerberos creds to request a DelegationToken for MapReduce to use
        // We could use the specified keytab (args[1]), but we're already logged in and don't need to, so we can just use the current user
        KerberosToken kt = new KerberosToken();
        try {
            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            if (!user.hasKerberosCredentials()) {
                throw new IllegalStateException("Expected current user to have Kerberos credentials");
            }

            // Get the principal via UGI
            principal = user.getUserName();

            // Connector w/ the Kerberos creds
            ZooKeeperInstance inst = new ZooKeeperInstance(clientConf);
            Connector conn = inst.getConnector(principal, kt);

            // Do the explicit check to see if the user has the permission to get a delegation token
            if (!conn.securityOperations().hasSystemPermission(conn.whoami(),
                    SystemPermission.OBTAIN_DELEGATION_TOKEN)) {
                log.error(principal + " doesn't have the " + SystemPermission.OBTAIN_DELEGATION_TOKEN.name()
                        + " SystemPermission neccesary to obtain a delegation token. MapReduce tasks cannot automatically use the client's"
                        + " credentials on remote servers. Delegation tokens provide a means to run MapReduce without distributing the user's credentials.");
                throw new IllegalStateException(
                        conn.whoami() + " does not have permission to obtain a delegation token");
            }

            // Fetch a delegation token from Accumulo
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());

        } catch (Exception e) {
            final String msg = "Failed to acquire DelegationToken for use with MapReduce";
            log.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    } else {
        // Simple principal + password
        principal = args[0];
        token = new PasswordToken(args[1]);
    }

    AccumuloInputFormat.setConnectorInfo(job, principal, token);
    AccumuloOutputFormat.setConnectorInfo(job, principal, token);

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job, clientConf);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.randomwalk.sequential.MapRedVerifyTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }/*from  ww  w  .j  a v a2  s .c o  m*/

    ClientConfiguration clientConf = ClientConfiguration.loadDefault().withInstance(args[3])
            .withZkHosts(args[4]);

    AccumuloInputFormat.setInputTableName(job, args[2]);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConf);
    AccumuloOutputFormat.setDefaultTableName(job, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job, clientConf);

    job.setInputFormatClass(AccumuloInputFormat.class);
    if (clientConf.getBoolean(ClientProperty.INSTANCE_RPC_SASL_ENABLED.getKey(), false)) {
        // Better be logged in
        KerberosToken token = new KerberosToken();
        try {
            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            if (!user.hasKerberosCredentials()) {
                throw new IllegalStateException("Expected current user to have Kerberos credentials");
            }

            String newPrincipal = user.getUserName();

            ZooKeeperInstance inst = new ZooKeeperInstance(clientConf);
            Connector conn = inst.getConnector(newPrincipal, token);

            // Do the explicit check to see if the user has the permission to get a delegation token
            if (!conn.securityOperations().hasSystemPermission(conn.whoami(),
                    SystemPermission.OBTAIN_DELEGATION_TOKEN)) {
                log.error(newPrincipal + " doesn't have the " + SystemPermission.OBTAIN_DELEGATION_TOKEN.name()
                        + " SystemPermission neccesary to obtain a delegation token. MapReduce tasks cannot automatically use the client's"
                        + " credentials on remote servers. Delegation tokens provide a means to run MapReduce without distributing the user's credentials.");
                throw new IllegalStateException(
                        conn.whoami() + " does not have permission to obtain a delegation token");
            }

            // Fetch a delegation token from Accumulo
            AuthenticationToken dt = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());

            // Set the delegation token instead of the kerberos token
            AccumuloInputFormat.setConnectorInfo(job, newPrincipal, dt);
            AccumuloOutputFormat.setConnectorInfo(job, newPrincipal, dt);
        } catch (Exception e) {
            final String msg = "Failed to acquire DelegationToken for use with MapReduce";
            log.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    } else {
        AccumuloInputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
        AccumuloOutputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
    }

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SeqReduceClass.class);
    job.setNumReduceTasks(1);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setCreateTables(job, true);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.test.randomwalk.shard.SortTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }//from   w ww  . jav a2s  .c o  m

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, seqFile);

    job.setPartitionerClass(KeyRangePartitioner.class);
    KeyRangePartitioner.setSplitFile(job, splitFile);

    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(splits.size() + 1);

    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    AccumuloFileOutputFormat.setOutputPath(job, new Path(outputDir));

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.testing.core.randomwalk.multitable.CopyTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }/*from   w w w  . ja  v  a2s.c om*/

    ClientConfiguration clientConf = new ClientConfiguration().withInstance(args[3]).withZkHosts(args[4]);

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setInputTableName(job, args[2]);
    AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConf);

    final String principal;
    final AuthenticationToken token;
    if (clientConf.getBoolean(ClientProperty.INSTANCE_RPC_SASL_ENABLED.getKey(), false)) {
        // Use the Kerberos creds to request a DelegationToken for MapReduce
        // to use
        // We could use the specified keytab (args[1]), but we're already
        // logged in and don't need to, so we can just use the current user
        KerberosToken kt = new KerberosToken();
        try {
            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            if (!user.hasKerberosCredentials()) {
                throw new IllegalStateException("Expected current user to have Kerberos credentials");
            }

            // Get the principal via UGI
            principal = user.getUserName();

            // Connector w/ the Kerberos creds
            ZooKeeperInstance inst = new ZooKeeperInstance(clientConf);
            Connector conn = inst.getConnector(principal, kt);

            // Do the explicit check to see if the user has the permission
            // to get a delegation token
            if (!conn.securityOperations().hasSystemPermission(conn.whoami(),
                    SystemPermission.OBTAIN_DELEGATION_TOKEN)) {
                log.error(principal + " doesn't have the " + SystemPermission.OBTAIN_DELEGATION_TOKEN.name()
                        + " SystemPermission neccesary to obtain a delegation token. MapReduce tasks cannot automatically use the client's"
                        + " credentials on remote servers. Delegation tokens provide a means to run MapReduce without distributing the user's credentials.");
                throw new IllegalStateException(
                        conn.whoami() + " does not have permission to obtain a delegation token");
            }

            // Fetch a delegation token from Accumulo
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());

        } catch (Exception e) {
            final String msg = "Failed to acquire DelegationToken for use with MapReduce";
            log.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    } else {
        // Simple principal + password
        principal = args[0];
        token = new PasswordToken(args[1]);
    }

    AccumuloInputFormat.setConnectorInfo(job, principal, token);
    AccumuloOutputFormat.setConnectorInfo(job, principal, token);

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job, clientConf);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.testing.core.randomwalk.sequential.MapRedVerifyTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }//w w  w .  j av a2 s.  co m

    ClientConfiguration clientConf = ClientConfiguration.loadDefault().withInstance(args[3])
            .withZkHosts(args[4]);

    AccumuloInputFormat.setInputTableName(job, args[2]);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConf);
    AccumuloOutputFormat.setDefaultTableName(job, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job, clientConf);

    job.setInputFormatClass(AccumuloInputFormat.class);
    if (clientConf.getBoolean(ClientProperty.INSTANCE_RPC_SASL_ENABLED.getKey(), false)) {
        // Better be logged in
        KerberosToken token = new KerberosToken();
        try {
            UserGroupInformation user = UserGroupInformation.getCurrentUser();
            if (!user.hasKerberosCredentials()) {
                throw new IllegalStateException("Expected current user to have Kerberos credentials");
            }

            String newPrincipal = user.getUserName();

            ZooKeeperInstance inst = new ZooKeeperInstance(clientConf);
            Connector conn = inst.getConnector(newPrincipal, token);

            // Do the explicit check to see if the user has the permission
            // to get a delegation token
            if (!conn.securityOperations().hasSystemPermission(conn.whoami(),
                    SystemPermission.OBTAIN_DELEGATION_TOKEN)) {
                log.error(newPrincipal + " doesn't have the " + SystemPermission.OBTAIN_DELEGATION_TOKEN.name()
                        + " SystemPermission neccesary to obtain a delegation token. MapReduce tasks cannot automatically use the client's"
                        + " credentials on remote servers. Delegation tokens provide a means to run MapReduce without distributing the user's credentials.");
                throw new IllegalStateException(
                        conn.whoami() + " does not have permission to obtain a delegation token");
            }

            // Fetch a delegation token from Accumulo
            AuthenticationToken dt = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());

            // Set the delegation token instead of the kerberos token
            AccumuloInputFormat.setConnectorInfo(job, newPrincipal, dt);
            AccumuloOutputFormat.setConnectorInfo(job, newPrincipal, dt);
        } catch (Exception e) {
            final String msg = "Failed to acquire DelegationToken for use with MapReduce";
            log.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    } else {
        AccumuloInputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
        AccumuloOutputFormat.setConnectorInfo(job, args[0], new PasswordToken(args[1]));
    }

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SeqReduceClass.class);
    job.setNumReduceTasks(1);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setCreateTables(job, true);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.giraph.job.GiraphJob.java

License:Apache License

/**
 * Runs the actual graph application through Hadoop Map-Reduce.
 *
 * @param verbose If true, provide verbose output, false otherwise
 * @return True if success, false otherwise
 * @throws ClassNotFoundException// ww  w. ja  va  2  s  . c o  m
 * @throws InterruptedException
 * @throws IOException
 */
public final boolean run(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
    // Most users won't hit this hopefully and can set it higher if desired
    setIntConfIfDefault("mapreduce.job.counters.limit", 512);

    // Capacity scheduler-specific settings.  These should be enough for
    // a reasonable Giraph job
    setIntConfIfDefault("mapred.job.map.memory.mb", 1024);
    setIntConfIfDefault("mapred.job.reduce.memory.mb", 0);

    // Speculative execution doesn't make sense for Giraph
    giraphConfiguration.setBoolean("mapred.map.tasks.speculative.execution", false);

    // Set the ping interval to 5 minutes instead of one minute
    // (DEFAULT_PING_INTERVAL)
    Client.setPingInterval(giraphConfiguration, 60000 * 5);

    // Should work in MAPREDUCE-1938 to let the user jars/classes
    // get loaded first
    giraphConfiguration.setBoolean("mapreduce.user.classpath.first", true);
    giraphConfiguration.setBoolean("mapreduce.job.user.classpath.first", true);

    // If the checkpoint frequency is 0 (no failure handling), set the max
    // tasks attempts to be 0 to encourage faster failure of unrecoverable jobs
    if (giraphConfiguration.getCheckpointFrequency() == 0) {
        int oldMaxTaskAttempts = giraphConfiguration.getMaxTaskAttempts();
        giraphConfiguration.setMaxTaskAttempts(0);
        if (LOG.isInfoEnabled()) {
            LOG.info("run: Since checkpointing is disabled (default), "
                    + "do not allow any task retries (setting " + GiraphConstants.MAX_TASK_ATTEMPTS.getKey()
                    + " = 0, " + "old value = " + oldMaxTaskAttempts + ")");
        }
    }

    // Set the job properties, check them, and submit the job
    ImmutableClassesGiraphConfiguration conf = new ImmutableClassesGiraphConfiguration(giraphConfiguration);
    checkLocalJobRunnerConfiguration(conf);

    int tryCount = 0;
    GiraphJobRetryChecker retryChecker = conf.getJobRetryChecker();
    while (true) {
        JobProgressTrackerService jobProgressTrackerService = JobProgressTrackerService
                .createJobProgressServer(conf);

        tryCount++;
        Job submittedJob = new Job(conf, jobName);
        if (submittedJob.getJar() == null) {
            submittedJob.setJarByClass(getClass());
        }
        submittedJob.setNumReduceTasks(0);
        submittedJob.setMapperClass(GraphMapper.class);
        submittedJob.setInputFormatClass(BspInputFormat.class);
        submittedJob.setOutputFormatClass(BspOutputFormat.class);
        if (jobProgressTrackerService != null) {
            jobProgressTrackerService.setJob(submittedJob);
        }

        GiraphJobObserver jobObserver = conf.getJobObserver();
        jobObserver.launchingJob(submittedJob);
        submittedJob.submit();
        if (LOG.isInfoEnabled()) {
            LOG.info("Tracking URL: " + submittedJob.getTrackingURL());
            LOG.info("Waiting for resources... Job will start only when it gets all "
                    + (conf.getMinWorkers() + 1) + " mappers");
        }
        jobObserver.jobRunning(submittedJob);
        HaltApplicationUtils.printHaltInfo(submittedJob, conf);

        boolean passed = submittedJob.waitForCompletion(verbose);
        if (jobProgressTrackerService != null) {
            jobProgressTrackerService.stop(passed);
        }
        jobObserver.jobFinished(submittedJob, passed);

        if (!passed) {
            String restartFrom = retryChecker.shouldRestartCheckpoint(submittedJob);
            if (restartFrom != null) {
                GiraphConstants.RESTART_JOB_ID.set(conf, restartFrom);
                continue;
            }
        }

        if (passed || !retryChecker.shouldRetry(submittedJob, tryCount)) {
            return passed;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("run: Retrying job, " + tryCount + " try");
        }
    }
}

From source file:org.apache.phoenix.mapreduce.CsvBulkLoadTool.java

License:Apache License

@Override
protected void setupJob(Job job) {
    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(CsvToKeyValueMapper.class);
    }/*from w  ww .j av  a  2s . co  m*/
    job.setMapperClass(CsvToKeyValueMapper.class);
}

From source file:org.apache.phoenix.mapreduce.JsonBulkLoadTool.java

License:Apache License

@Override
protected void setupJob(Job job) {
    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(JsonToKeyValueMapper.class);
    }/*from   ww  w  .j av a  2 s  .c o  m*/
    job.setMapperClass(JsonToKeyValueMapper.class);
}

From source file:org.apache.phoenix.mapreduce.RegexBulkLoadTool.java

License:Apache License

@Override
protected void setupJob(Job job) {
    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(RegexToKeyValueMapper.class);
    }//  ww w .  j  av  a 2 s.c o  m
    job.setMapperClass(RegexToKeyValueMapper.class);
}