Example usage for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value)

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:com.placeiq.piqconnect.InitialVectorGenerator.java

License:Apache License

private Job buildJob() throws Exception {
    Configuration conf = getConf();
    conf.setLong("numberOfNodes", numberOfNodes);

    Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1");
    job.setJarByClass(InitialVectorGenerator.class);
    job.setMapperClass(_Mapper.class);
    job.setReducerClass(_Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, pathBitmask);
    FileOutputFormat.setOutputPath(job, pathVector);
    FileOutputFormat.setCompressOutput(job, true);

    return job;//w w  w.j  a va2  s. com
}

From source file:com.rim.logdriver.mapreduce.avro.AvroBlockInputFormat.java

License:Apache License

/**
 * Creates a new AvroBlockRecordReader./*from   w ww.j  av a 2 s .  c o m*/
 * 
 * Increases there default value mapreduce.job.max.split.locations to 100000,
 * if it's not already set.
 * 
 * Also sets mapred.max.split.size to the default block size for the root
 * directory ("/"), if it's not already set.
 * 
 * @param split
 *          The InputSplit.
 * @param context
 *          The TaskAttemptContext.
 * @return A new AvroBlockRecordReader.
 * @throws IOException
 *           If there is an I/O error.
 */
@Override
public RecordReader<AvroFileHeader, BytesWritable> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }

    return new AvroBlockRecordReader();
}

From source file:com.rim.logdriver.mapreduce.boom.BoomInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }//from   w  w w  .  j a v a2 s  . co m
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }
    for (String key : new String[] { "mapreduce.job.max.split.locations", "mapred.max.split.size" }) {
        LOG.info("{} = {}", key, context.getConfiguration().get(key));
    }

    return super.getSplits(context);
}

From source file:com.rim.logdriver.sawmill.Sawmill.java

License:Apache License

public void run(String[] args) {
    if (args.length < 1) {
        System.out.println("Usage: " + this.getClass().getSimpleName() + " <config.properties>");
        System.exit(1);//from  w  w  w .  j av  a 2s . co m
    }

    LOG.info("Starting {}", Sawmill.class.getSimpleName());

    // First arg is the config
    String configFile = args[0];

    // Load configuration.
    Properties conf = new Properties();
    try {
        conf.load(new FileInputStream(configFile));
    } catch (FileNotFoundException e) {
        LOG.error("Config file not found.", e);
        System.exit(1);
    } catch (Throwable t) {
        LOG.error("Error reading config file.", t);
        System.exit(1);
    }

    // Parse the configuration.

    // Load in any Hadoop config files.
    Configuration hConf = new Configuration();
    {
        String[] hadoopConfs = Configs.hadoopConfigPaths.getArray(conf);
        for (String confPath : hadoopConfs) {
            hConf.addResource(new Path(confPath));
        }
        // Also, don't shut down my FileSystem automatically!!!
        hConf.setBoolean("fs.automatic.close", false);
        for (Entry<Object, Object> e : System.getProperties().entrySet()) {
            if (e.getValue() instanceof Integer) {
                hConf.setInt(e.getKey().toString(), (Integer) e.getValue());
            } else if (e.getValue() instanceof Long) {
                hConf.setLong(e.getKey().toString(), (Long) e.getValue());
            } else {
                hConf.set(e.getKey().toString(), e.getValue().toString());
            }
        }
    }

    // Ensure that UserGroupInformation is set up, and knows if security is
    // enabled.
    UserGroupInformation.setConfiguration(hConf);

    // Kerberos credentials. If these are not present, then it just won't try to
    // authenticate.
    String kerbConfPrincipal = Configs.kerberosPrincipal.get(conf);
    String kerbKeytab = Configs.kerberosKeytab.get(conf);
    Authenticator.getInstance().setKerbConfPrincipal(kerbConfPrincipal);
    Authenticator.getInstance().setKerbKeytab(kerbKeytab);

    // Check out the number of threads for workers, and creater the threadpools
    // for both workers and stats updates.
    int threadCount = Configs.threadpoolSize.getInteger(conf);
    final ScheduledExecutorService executor = Executors.newScheduledThreadPool(threadCount);

    // Get the MBean server
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();

    // Set up the Mina Exception Monitor
    ExceptionMonitor.setInstance(new ExceptionLoggerExceptionMonitor());

    // For each port->output mapping, create a path (listener, queue, worker).
    // List<DataPath> paths = new ArrayList<DataPath>();
    final List<IoAcceptor> acceptors = new ArrayList<IoAcceptor>();
    final List<Writer> writers = new ArrayList<Writer>();
    {
        String[] pathStrings = Configs.paths.getArray(conf);
        for (String p : pathStrings) {
            Properties pathConf = Util.subProperties(conf, "path." + p);

            String name = Configs.name.get(pathConf);
            if (name == null) {
                LOG.info("Path has no name.  Using {}", p);
                name = p;
            }
            LOG.info("[{}] Configuring path {}", name, name);

            // Check the properties for this specific instance
            Integer maxLineLength = Configs.tcpMaxLineLength.getInteger(pathConf);
            if (maxLineLength == null) {
                maxLineLength = Configs.defaultTcpMaxLineLength.getInteger(conf);
            }
            LOG.info("[{}] Maximum line length is {}", name, maxLineLength);

            InetAddress bindAddress = null;
            try {
                String address = Configs.bindAddress.get(pathConf);
                bindAddress = InetAddress.getByName(address);
            } catch (UnknownHostException e) {
                LOG.error("[{}] Error getting bindAddress from string {}",
                        new Object[] { name, pathConf.getProperty("bindAddress") }, e);
            }

            Integer port = Configs.port.getInteger(pathConf);
            if (port == null) {
                LOG.error("[{}] Port not set.  Skipping this path.", name);
                continue;
            }

            int queueLength = Configs.queueCapacity.getInteger(pathConf);

            // Set up the actual processing chain
            IoAcceptor acceptor = new NioSocketAcceptor();
            SocketSessionConfig sessionConfig = (SocketSessionConfig) acceptor.getSessionConfig();
            sessionConfig.setReuseAddress(true);
            acceptors.add(acceptor);

            String charsetName = Configs.charset.getString(pathConf);
            Charset charset = null;
            try {
                charset = Charset.forName(charsetName);
            } catch (UnsupportedCharsetException e) {
                LOG.error("[{}] Charset '{}' is not supported.  Defaulting to UTF-8.", name, charsetName);
                charset = Charset.forName("UTF-8");
            }
            LOG.info("[{}] Using character set {}", name, charset.displayName());
            TextLineCodecFactory textLineCodecFactory = new TextLineCodecFactory(charset, LineDelimiter.UNIX,
                    LineDelimiter.AUTO);
            textLineCodecFactory.setDecoderMaxLineLength(maxLineLength);
            acceptor.getFilterChain().addLast("textLineCodec", new ProtocolCodecFilter(textLineCodecFactory));

            int numBuckets = Configs.outputBuckets.getInteger(pathConf);
            if (numBuckets > 1) {
                // Set up mulitple writers for one MultiEnqueueHandler
                @SuppressWarnings("unchecked")
                BlockingQueue<String>[] queues = new BlockingQueue[numBuckets];

                for (int i = 0; i < numBuckets; i++) {
                    BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength);
                    queues[i] = queue;

                    // Set up the processor on the other end.
                    Writer writer = new Writer();
                    writer.setName(name);
                    writer.setConfig(pathConf);
                    writer.setHadoopConf(hConf);
                    writer.setQueue(queue);
                    writer.init();

                    // Set up MBean for the Writer
                    {
                        ObjectName mbeanName = null;
                        try {
                            mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type="
                                    + Writer.class.getSimpleName() + " [" + i + "]" + ",name=" + name);
                        } catch (MalformedObjectNameException e) {
                            LOG.error("[{}] Error creating MBean name.", name, e);
                        } catch (NullPointerException e) {
                            LOG.error("[{}] Error creating MBean name.", name, e);
                        }
                        try {
                            mbs.registerMBean(writer, mbeanName);
                        } catch (InstanceAlreadyExistsException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        } catch (MBeanRegistrationException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        } catch (NotCompliantMBeanException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        }
                    }

                    executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS);
                    writers.add(writer);
                }

                MultiEnqueueHandler handler = new MultiEnqueueHandler(queues);
                acceptor.setHandler(handler);

                // Set up MBean for the MultiEnqueueHandler
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(MultiEnqueueHandler.class.getPackage().getName() + ":type="
                                + MultiEnqueueHandler.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(handler, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }
            } else {
                BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength);

                // Set up the processor on the other end.
                Writer writer = new Writer();
                writer.setName(name);
                writer.setConfig(pathConf);
                writer.setHadoopConf(hConf);
                writer.setQueue(queue);
                writer.init();

                // Set up MBean for the Writer
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type="
                                + Writer.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(writer, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }

                executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS);
                writers.add(writer);

                EnqueueHandler handler = new EnqueueHandler(queue);
                acceptor.setHandler(handler);

                // Set up MBean for the EnqueueHandler
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(EnqueueHandler.class.getPackage().getName() + ":type="
                                + EnqueueHandler.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(handler, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }
            }

            acceptor.getSessionConfig().setReadBufferSize(Configs.tcpReadBufferSize.getInteger(pathConf));
            acceptor.getSessionConfig().setIdleTime(IdleStatus.BOTH_IDLE, 5);

            while (true) {
                try {
                    acceptor.bind(new InetSocketAddress(bindAddress, port));
                } catch (IOException e) {
                    LOG.error("Error binding to {}:{}.  Retrying...", bindAddress, port);

                    try {
                        Thread.sleep(2000);
                    } catch (InterruptedException e1) {
                        // nothing
                    }

                    continue;
                }

                break;
            }

        }
    }

    // Register a shutdown hook..
    Runtime.getRuntime().addShutdownHook(new Thread() {
        public void run() {
            LOG.info("Shutting down");

            LOG.info("Unbinding and disposing of all IoAcceptors");
            for (IoAcceptor acceptor : acceptors) {
                acceptor.unbind();
                acceptor.dispose(true);
            }

            LOG.info("Shutting down worker threadpools.  This could take a little while.");
            executor.shutdown();
            try {
                executor.awaitTermination(10, TimeUnit.MINUTES);
            } catch (InterruptedException e) {
                LOG.error("Interrupted waiting for writer threadpool termination.", e);
            }
            if (!executor.isTerminated()) {
                LOG.error("Threadpool did not terminate cleanly.");
            }

            LOG.info("Cleaning out any remaining messages from the queues.");
            List<Thread> threads = new ArrayList<Thread>();
            for (final Writer writer : writers) {
                Runnable r = new Runnable() {
                    @Override
                    public void run() {
                        try {
                            writer.runAndClose();
                        } catch (Throwable t) {
                            LOG.error("Error shutting down writer [{}]", writer.getName(), t);
                        }
                    }
                };
                Thread t = new Thread(r);
                t.setDaemon(false);
                t.start();
                threads.add(t);
            }

            for (Thread t : threads) {
                try {
                    t.join();
                } catch (InterruptedException e) {
                    LOG.error("Interrupted waiting for thread to finish.");
                }
            }

            LOG.info("Closing filesystems.");
            try {
                FileSystem.closeAll();
            } catch (Throwable t) {
                LOG.error("Error closing filesystems.", t);
            }

            LOG.info("Finished shutting down cleanly.");
        }
    });
}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   w  ww.j av a 2 s. c om

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.ruizhan.hadoop.hdfs.Trash.java

License:Apache License

/**
 * In case of the symlinks or mount points, one has to move the appropriate
 * trashbin in the actual volume of the path p being deleted.
 *
 * Hence we get the file system of the fully-qualified resolved-path and
 * then move the path p to the trashbin in that volume,
 * @param fs - the filesystem of path p/*from w ww . java2 s. c  o m*/
 * @param p - the  path being deleted - to be moved to trasg
 * @param conf - configuration
 * @return false if the item is already in the trash or trash is disabled
 * @throws IOException on error
 */
public static boolean moveToAppropriateTrash(FileSystem fs, Path p, Configuration conf) throws IOException {
    Path fullyResolvedPath = fs.resolvePath(p);
    FileSystem fullyResolvedFs = FileSystem.get(fullyResolvedPath.toUri(), conf);
    // If the trash interval is configured server side then clobber this
    // configuration so that we always respect the server configuration.
    try {
        long trashInterval = fullyResolvedFs.getServerDefaults(fullyResolvedPath).getTrashInterval();
        if (0 != trashInterval) {
            Configuration confCopy = new Configuration(conf);
            confCopy.setLong(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, trashInterval);
            conf = confCopy;
        }
    } catch (Exception e) {
        // If we can not determine that trash is enabled server side then
        // bail rather than potentially deleting a file when trash is enabled.
        throw new IOException("Failed to get server trash configuration", e);
    }
    Trash trash = new Trash(fullyResolvedFs, conf);
    boolean success = trash.moveToTrash(fullyResolvedPath);
    if (success) {
        System.out.println("Moved: '" + p + "' to trash at: " + trash.getCurrentTrashDir());
    }
    return success;
}

From source file:com.sa.npopa.samples.hbase.RowCounter.java

License:Apache License

/**
 * Sets up the actual job.// w  ww . j a  va  2 s . c  o m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String startKey = null;
    String endKey = null;
    long startTime = 0;
    long endTime = 0;

    StringBuilder sb = new StringBuilder();

    final String rangeSwitch = "--range=";
    final String startTimeArgKey = "--starttime=";
    final String endTimeArgKey = "--endtime=";
    final String expectedCountArg = "--expected-count=";

    // First argument is table name, starting from second
    for (int i = 1; i < args.length; i++) {
        if (args[i].startsWith(rangeSwitch)) {
            String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2);
            if (startEnd.length != 2 || startEnd[1].contains(",")) {
                printUsage("Please specify range in such format as \"--range=a,b\" "
                        + "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
                return null;
            }
            startKey = startEnd[0];
            endKey = startEnd[1];
            continue;
        }
        if (args[i].startsWith(startTimeArgKey)) {
            startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(endTimeArgKey)) {
            endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(expectedCountArg)) {
            conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length())));
            continue;
        }
        // if no switch, assume column names
        sb.append(args[i]);
        sb.append(" ");
    }
    if (endTime < startTime) {
        printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
        return null;
    }

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(RowCounter.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    if (startKey != null && !startKey.equals("")) {
        scan.setStartRow(Bytes.toBytes(startKey));
    }
    if (endKey != null && !endKey.equals("")) {
        scan.setStopRow(Bytes.toBytes(endKey));
    }
    if (sb.length() > 0) {
        for (String columnName : sb.toString().trim().split(" ")) {
            String family = StringUtils.substringBefore(columnName, ":");
            String qualifier = StringUtils.substringAfter(columnName, ":");

            if (StringUtils.isBlank(qualifier)) {
                scan.addFamily(Bytes.toBytes(family));
            } else {
                scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            }
        }
    }
    scan.setFilter(new FirstKeyOnlyFilter());
    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class,
            Result.class, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

private void runDistributedImplicitSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose,
        int rowNums) throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("rawtypes")
    Class<? extends Mapper> solverMapper = DistributedSolveImplicitFeedbackMapper.class;
    Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);

    Configuration solverConf = solverForUorI.getConfiguration();

    solverConf.setLong("mapred.min.split.size", dfsBlockSize);
    solverConf.setLong("mapred.max.split.size", dfsBlockSize);
    solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    solverConf.setInt("mapred.map.tasks", LARGE_MATRIX_MAP_TASKS_NUM);
    solverConf.setLong("mapred.task.timeout", 600000 * 5);
    solverConf.setInt("mapred.job.reuse.jvm.num.tasks", -1);
    solverConf.set("mapred.child.java.opts", SMALL_MATRIX_MEMORY);

    solverConf.set(LAMBDA, String.valueOf(lambda));
    solverConf.set(ALPHA, String.valueOf(alpha));
    solverConf.setInt(NUM_FEATURES, numFeatures);
    solverConf.set(FEATURE_MATRIX, pathToUorI.toString());
    solverConf.set(FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString());
    solverConf.setInt("rowNums", rowNums);
    solverConf.setBoolean("mapred.compress.map.output", true);
    solverConf.set("mapred.map.output.compression.codec", LZO_CODEC_CLASS);
    solverForUorI.waitForCompletion(true);
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java

License:Apache License

private void runSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int numRows,
        boolean largeMatrix) throws ClassNotFoundException, IOException, InterruptedException {
    @SuppressWarnings("rawtypes")
    Class<? extends Mapper> solverMapper = implicitFeedback ? SolveImplicitFeedbackMultithreadedMapper.class
            : SolveExplicitFeedbackMapper.class;

    Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);

    Configuration solverConf = solverForUorI.getConfiguration();

    long matrixSizeExp = (long) (8L * numRows * numFeatures * SAFE_MARGIN);
    long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / HadoopClusterUtil.MAP_TASKS_PER_NODE;
    int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / matrixSizeExp));

    if (matrixSizeExp > memoryThreshold) {
        solverConf.set("mapred.child.java.opts", "-Xmx8g");
        solverConf.set("mapred.map.child.java.opts", "-Xmx8g");
        solverConf.setLong("dfs.block.size", HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.setInt("mapred.map.tasks", HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
        solverConf.setLong("mapred.min.split.size",
                HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.setLong("mapred.max.split.size",
                HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE, pathToHostLocks().toString());
        solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE_NUMS,
                Math.min(HadoopClusterUtil.MAP_TASKS_PER_NODE, numTaskPerDataNode));
    } else {/*  w w w  .  j  ava 2  s.c o m*/
        solverConf.setLong("mapred.min.split.size",
                HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings));
        solverConf.setLong("mapred.max.split.size",
                HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings));
        solverConf.setInt("mapred.map.tasks",
                HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks);
        //solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    }
    solverConf.setLong("mapred.task.timeout", taskTimeout);
    solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);

    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LAMBDA, String.valueOf(lambda));
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.ALPHA, String.valueOf(alpha));
    solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_FEATURES, numFeatures);
    solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_ROWS, numRows);
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX, pathToUorI.toString());
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX_TRANSPOSE,
            pathToTranspose.toString());

    solverForUorI.waitForCompletion(true);
}

From source file:com.splicemachine.orc.OrcConf.java

License:Open Source License

public static void setLongVar(Configuration conf, OrcConf.ConfVars var, long val) {
    conf.setLong(var.varname, val);
}