Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.placeiq.piqconnect.BlocksBuilder.java

License:Apache License

protected Job configStage1() throws Exception {
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(pathOutput, true); // useful ?

    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
    conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

    Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
    job.setJarByClass(BlocksBuilder.class);
    job.setMapperClass(MapStage1.class);
    job.setReducerClass(RedStage1.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(BlockIndexWritable.class);
    job.setMapOutputValueClass(LightBlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);

    FileInputFormat.setInputPaths(job, pathEdges);
    SequenceFileOutputFormat.setOutputPath(job, pathOutput);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    Runner.setCompression(job);//  w ww.j  a  v  a 2s  . co m

    return job;
}

From source file:com.pocketx.gravity.recommender.cf.similarity.job.RowSimilarityJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from  w ww  . j  a  v a  2  s . c o m*/
    addOutputOption();
    addOption("numberOfColumns", "r", "Number of columns in the input matrix", false);
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
    addOption("maxSimilaritiesPerRow", "m",
            "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')',
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
    addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?",
            String.valueOf(false));
    addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    int numberOfColumns;

    if (hasOption("numberOfColumns")) {
        // Number of columns explicitly specified via CLI
        numberOfColumns = Integer.parseInt(getOption("numberOfColumns"));
    } else {
        // else get the number of columns by determining the cardinality of a vector in the input matrix
        numberOfColumns = getDimensions(getInputPath());
    }

    String similarityClassnameArg = getOption("similarityClassname");
    String similarityClassname;
    try {
        similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
    } catch (IllegalArgumentException iae) {
        similarityClassname = similarityClassnameArg;
    }

    // Clear the output and temp paths if the overwrite option has been set
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        // Clear the temp path
        HadoopUtil.delete(getConf(), getTempPath());
        // Clear the output path
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
    boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD;

    Path weightsPath = getTempPath("weights");
    Path normsPath = getTempPath("norms.bin");
    Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin");
    Path maxValuesPath = getTempPath("maxValues.bin");
    Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job normsAndTranspose = prepareJob(getInputPath(), weightsPath, VectorNormMapper.class,
                IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
                VectorWritable.class);
        normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class);
        Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration();
        normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold));
        normsAndTransposeConf.set(NORMS_PATH, normsPath.toString());
        normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        boolean succeeded = normsAndTranspose.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class,
                IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class,
                VectorWritable.class);
        pairwiseSimilarity.setCombinerClass(VectorSumReducer.class);
        Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
        pairwiseConf.set(THRESHOLD, String.valueOf(threshold));
        pairwiseConf.set(NORMS_PATH, normsPath.toString());
        pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
        pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity);
        boolean succeeded = pairwiseSimilarity.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class,
                IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class,
                IntWritable.class, VectorWritable.class);
        asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class);
        asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
        boolean succeeded = asMatrix.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
    }

    return 0;
}

From source file:com.qubole.rubix.core.TestCachingInputStream.java

License:Apache License

@BeforeMethod
public void setup() throws IOException, InterruptedException {
    final Configuration conf = new Configuration();

    conf.setBoolean(CacheConfig.DATA_CACHE_STRICT_MODE, true);
    conf.setInt(CacheConfig.dataCacheBookkeeperPortConf, 3456);
    Thread thread = new Thread() {
        public void run() {
            BookKeeperServer.startServer(conf);
        }/*from   www .  j a  v  a2  s  . c  om*/
    };
    thread.start();

    DataGen.populateFile(backendFileName);

    while (!BookKeeperServer.isServerUp()) {
        Thread.sleep(200);
        log.info("Waiting for BookKeeper Server to come up");
    }

    createCachingStream(conf);
}

From source file:com.qubole.rubix.core.TestCachingInputStream.java

License:Apache License

public void createCachingStream(Configuration conf) throws InterruptedException, IOException {
    conf.setBoolean(CacheConfig.DATA_CACHE_STRICT_MODE, true);
    conf.setInt(CacheConfig.dataCacheBookkeeperPortConf, 3456);

    File file = new File(backendFileName);

    LocalFSInputStream localFSInputStream = new LocalFSInputStream(backendFileName);
    FSDataInputStream fsDataInputStream = new FSDataInputStream(localFSInputStream);
    conf.setInt(CacheConfig.blockSizeConf, blockSize);
    log.info("All set to test");

    // This should be after server comes up else client could not be created
    inputStream = new CachingInputStream(fsDataInputStream, conf, backendPath, file.length(),
            file.lastModified(), new CachingFileSystemStats(), 64 * 1024 * 1024,
            ClusterType.TEST_CLUSTER_MANAGER);
}

From source file:com.rim.logdriver.sawmill.Sawmill.java

License:Apache License

public void run(String[] args) {
    if (args.length < 1) {
        System.out.println("Usage: " + this.getClass().getSimpleName() + " <config.properties>");
        System.exit(1);/*from   w ww . ja  v  a  2s  .  c om*/
    }

    LOG.info("Starting {}", Sawmill.class.getSimpleName());

    // First arg is the config
    String configFile = args[0];

    // Load configuration.
    Properties conf = new Properties();
    try {
        conf.load(new FileInputStream(configFile));
    } catch (FileNotFoundException e) {
        LOG.error("Config file not found.", e);
        System.exit(1);
    } catch (Throwable t) {
        LOG.error("Error reading config file.", t);
        System.exit(1);
    }

    // Parse the configuration.

    // Load in any Hadoop config files.
    Configuration hConf = new Configuration();
    {
        String[] hadoopConfs = Configs.hadoopConfigPaths.getArray(conf);
        for (String confPath : hadoopConfs) {
            hConf.addResource(new Path(confPath));
        }
        // Also, don't shut down my FileSystem automatically!!!
        hConf.setBoolean("fs.automatic.close", false);
        for (Entry<Object, Object> e : System.getProperties().entrySet()) {
            if (e.getValue() instanceof Integer) {
                hConf.setInt(e.getKey().toString(), (Integer) e.getValue());
            } else if (e.getValue() instanceof Long) {
                hConf.setLong(e.getKey().toString(), (Long) e.getValue());
            } else {
                hConf.set(e.getKey().toString(), e.getValue().toString());
            }
        }
    }

    // Ensure that UserGroupInformation is set up, and knows if security is
    // enabled.
    UserGroupInformation.setConfiguration(hConf);

    // Kerberos credentials. If these are not present, then it just won't try to
    // authenticate.
    String kerbConfPrincipal = Configs.kerberosPrincipal.get(conf);
    String kerbKeytab = Configs.kerberosKeytab.get(conf);
    Authenticator.getInstance().setKerbConfPrincipal(kerbConfPrincipal);
    Authenticator.getInstance().setKerbKeytab(kerbKeytab);

    // Check out the number of threads for workers, and creater the threadpools
    // for both workers and stats updates.
    int threadCount = Configs.threadpoolSize.getInteger(conf);
    final ScheduledExecutorService executor = Executors.newScheduledThreadPool(threadCount);

    // Get the MBean server
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();

    // Set up the Mina Exception Monitor
    ExceptionMonitor.setInstance(new ExceptionLoggerExceptionMonitor());

    // For each port->output mapping, create a path (listener, queue, worker).
    // List<DataPath> paths = new ArrayList<DataPath>();
    final List<IoAcceptor> acceptors = new ArrayList<IoAcceptor>();
    final List<Writer> writers = new ArrayList<Writer>();
    {
        String[] pathStrings = Configs.paths.getArray(conf);
        for (String p : pathStrings) {
            Properties pathConf = Util.subProperties(conf, "path." + p);

            String name = Configs.name.get(pathConf);
            if (name == null) {
                LOG.info("Path has no name.  Using {}", p);
                name = p;
            }
            LOG.info("[{}] Configuring path {}", name, name);

            // Check the properties for this specific instance
            Integer maxLineLength = Configs.tcpMaxLineLength.getInteger(pathConf);
            if (maxLineLength == null) {
                maxLineLength = Configs.defaultTcpMaxLineLength.getInteger(conf);
            }
            LOG.info("[{}] Maximum line length is {}", name, maxLineLength);

            InetAddress bindAddress = null;
            try {
                String address = Configs.bindAddress.get(pathConf);
                bindAddress = InetAddress.getByName(address);
            } catch (UnknownHostException e) {
                LOG.error("[{}] Error getting bindAddress from string {}",
                        new Object[] { name, pathConf.getProperty("bindAddress") }, e);
            }

            Integer port = Configs.port.getInteger(pathConf);
            if (port == null) {
                LOG.error("[{}] Port not set.  Skipping this path.", name);
                continue;
            }

            int queueLength = Configs.queueCapacity.getInteger(pathConf);

            // Set up the actual processing chain
            IoAcceptor acceptor = new NioSocketAcceptor();
            SocketSessionConfig sessionConfig = (SocketSessionConfig) acceptor.getSessionConfig();
            sessionConfig.setReuseAddress(true);
            acceptors.add(acceptor);

            String charsetName = Configs.charset.getString(pathConf);
            Charset charset = null;
            try {
                charset = Charset.forName(charsetName);
            } catch (UnsupportedCharsetException e) {
                LOG.error("[{}] Charset '{}' is not supported.  Defaulting to UTF-8.", name, charsetName);
                charset = Charset.forName("UTF-8");
            }
            LOG.info("[{}] Using character set {}", name, charset.displayName());
            TextLineCodecFactory textLineCodecFactory = new TextLineCodecFactory(charset, LineDelimiter.UNIX,
                    LineDelimiter.AUTO);
            textLineCodecFactory.setDecoderMaxLineLength(maxLineLength);
            acceptor.getFilterChain().addLast("textLineCodec", new ProtocolCodecFilter(textLineCodecFactory));

            int numBuckets = Configs.outputBuckets.getInteger(pathConf);
            if (numBuckets > 1) {
                // Set up mulitple writers for one MultiEnqueueHandler
                @SuppressWarnings("unchecked")
                BlockingQueue<String>[] queues = new BlockingQueue[numBuckets];

                for (int i = 0; i < numBuckets; i++) {
                    BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength);
                    queues[i] = queue;

                    // Set up the processor on the other end.
                    Writer writer = new Writer();
                    writer.setName(name);
                    writer.setConfig(pathConf);
                    writer.setHadoopConf(hConf);
                    writer.setQueue(queue);
                    writer.init();

                    // Set up MBean for the Writer
                    {
                        ObjectName mbeanName = null;
                        try {
                            mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type="
                                    + Writer.class.getSimpleName() + " [" + i + "]" + ",name=" + name);
                        } catch (MalformedObjectNameException e) {
                            LOG.error("[{}] Error creating MBean name.", name, e);
                        } catch (NullPointerException e) {
                            LOG.error("[{}] Error creating MBean name.", name, e);
                        }
                        try {
                            mbs.registerMBean(writer, mbeanName);
                        } catch (InstanceAlreadyExistsException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        } catch (MBeanRegistrationException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        } catch (NotCompliantMBeanException e) {
                            LOG.error("[{}] Error registering MBean name.", name, e);
                        }
                    }

                    executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS);
                    writers.add(writer);
                }

                MultiEnqueueHandler handler = new MultiEnqueueHandler(queues);
                acceptor.setHandler(handler);

                // Set up MBean for the MultiEnqueueHandler
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(MultiEnqueueHandler.class.getPackage().getName() + ":type="
                                + MultiEnqueueHandler.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(handler, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }
            } else {
                BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength);

                // Set up the processor on the other end.
                Writer writer = new Writer();
                writer.setName(name);
                writer.setConfig(pathConf);
                writer.setHadoopConf(hConf);
                writer.setQueue(queue);
                writer.init();

                // Set up MBean for the Writer
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type="
                                + Writer.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(writer, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }

                executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS);
                writers.add(writer);

                EnqueueHandler handler = new EnqueueHandler(queue);
                acceptor.setHandler(handler);

                // Set up MBean for the EnqueueHandler
                {
                    ObjectName mbeanName = null;
                    try {
                        mbeanName = new ObjectName(EnqueueHandler.class.getPackage().getName() + ":type="
                                + EnqueueHandler.class.getSimpleName() + ",name=" + name);
                    } catch (MalformedObjectNameException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    } catch (NullPointerException e) {
                        LOG.error("[{}] Error creating MBean name.", name, e);
                    }
                    try {
                        mbs.registerMBean(handler, mbeanName);
                    } catch (InstanceAlreadyExistsException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (MBeanRegistrationException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    } catch (NotCompliantMBeanException e) {
                        LOG.error("[{}] Error registering MBean name.", name, e);
                    }
                }
            }

            acceptor.getSessionConfig().setReadBufferSize(Configs.tcpReadBufferSize.getInteger(pathConf));
            acceptor.getSessionConfig().setIdleTime(IdleStatus.BOTH_IDLE, 5);

            while (true) {
                try {
                    acceptor.bind(new InetSocketAddress(bindAddress, port));
                } catch (IOException e) {
                    LOG.error("Error binding to {}:{}.  Retrying...", bindAddress, port);

                    try {
                        Thread.sleep(2000);
                    } catch (InterruptedException e1) {
                        // nothing
                    }

                    continue;
                }

                break;
            }

        }
    }

    // Register a shutdown hook..
    Runtime.getRuntime().addShutdownHook(new Thread() {
        public void run() {
            LOG.info("Shutting down");

            LOG.info("Unbinding and disposing of all IoAcceptors");
            for (IoAcceptor acceptor : acceptors) {
                acceptor.unbind();
                acceptor.dispose(true);
            }

            LOG.info("Shutting down worker threadpools.  This could take a little while.");
            executor.shutdown();
            try {
                executor.awaitTermination(10, TimeUnit.MINUTES);
            } catch (InterruptedException e) {
                LOG.error("Interrupted waiting for writer threadpool termination.", e);
            }
            if (!executor.isTerminated()) {
                LOG.error("Threadpool did not terminate cleanly.");
            }

            LOG.info("Cleaning out any remaining messages from the queues.");
            List<Thread> threads = new ArrayList<Thread>();
            for (final Writer writer : writers) {
                Runnable r = new Runnable() {
                    @Override
                    public void run() {
                        try {
                            writer.runAndClose();
                        } catch (Throwable t) {
                            LOG.error("Error shutting down writer [{}]", writer.getName(), t);
                        }
                    }
                };
                Thread t = new Thread(r);
                t.setDaemon(false);
                t.start();
                threads.add(t);
            }

            for (Thread t : threads) {
                try {
                    t.join();
                } catch (InterruptedException e) {
                    LOG.error("Interrupted waiting for thread to finish.");
                }
            }

            LOG.info("Closing filesystems.");
            try {
                FileSystem.closeAll();
            } catch (Throwable t) {
                LOG.error("Error closing filesystems.", t);
            }

            LOG.info("Finished shutting down cleanly.");
        }
    });
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//*w  w  w . jav a  2  s . c  o  m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.scaleoutsoftware.soss.hserver.GridOutputFormat.java

License:Apache License

/**
 * Sets the {@link NamedMap} to direct output to.
 *
 * @param job job to modify/*from w w  w . j  a v a2s.  c o m*/
 * @param map named map to be used for output
 */
public static void setNamedMap(Job job, NamedMap map) {
    Configuration configuration = job.getConfiguration();
    configuration.setBoolean(outputIsNamedMapProperty, true);
    configuration.setStrings(outputNamedMapProperty, map.getMapName());
    CustomSerializer keySerializer = map.getKeySerializer();
    CustomSerializer valueSerializer = map.getValueSerializer();
    SerializationMode serializationMode = map.getSerializationMode();
    AvailabilityMode availabilityMode = map.getAvailabilityMode();
    configuration.setInt(SERIALIZATION_MODE, serializationMode.ordinal());
    configuration.setInt(AVAILABILITY_MODE, availabilityMode.ordinal());
    configuration.setClass(outputNamedMapKeySerializerProperty, keySerializer.getClass(), Object.class);
    configuration.setClass(outputNamedMapValueSerializerProperty, valueSerializer.getClass(), Object.class);
    if (keySerializer.getObjectClass() != null) {
        configuration.setClass(outputNamedMapKeyProperty, keySerializer.getObjectClass(), Object.class);
    }
    if (valueSerializer.getObjectClass() != null) {
        configuration.setClass(outputNamedMapValueProperty, valueSerializer.getObjectClass(), Object.class);
    }
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

private void runSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int rowNums)
        throws ClassNotFoundException, IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    Class<? extends Mapper> solverMapper = null;
    if (implicitFeedback) {
        solverMapper = SolveImplicitFeedbackMapper.class;
    } else {//from w w w . j  a v  a 2s . c o  m
        solverMapper = SolveExplicitFeedbackMapper.class;
    }

    Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);

    Configuration solverConf = solverForUorI.getConfiguration();
    solverConf.set(LAMBDA, String.valueOf(lambda));
    solverConf.set(ALPHA, String.valueOf(alpha));
    solverConf.setInt(NUM_FEATURES, numFeatures);
    solverConf.set(FEATURE_MATRIX, pathToUorI.toString());
    solverConf.set(FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString());
    solverConf.setInt("rowNums", rowNums);
    solverConf.set("mapred.child.java.opts", SMALL_MATRIX_MEMORY);
    solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    solverConf.setInt("mapred.job.reuse.jvm.num.tasks", -1);
    solverConf.setBoolean("mapred.compress.map.output", true);
    solverConf.set("mapred.map.output.compression.codec", LZO_CODEC_CLASS);
    solverForUorI.waitForCompletion(true);
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

private void runDistributedImplicitSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose,
        int rowNums) throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("rawtypes")
    Class<? extends Mapper> solverMapper = DistributedSolveImplicitFeedbackMapper.class;
    Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);

    Configuration solverConf = solverForUorI.getConfiguration();

    solverConf.setLong("mapred.min.split.size", dfsBlockSize);
    solverConf.setLong("mapred.max.split.size", dfsBlockSize);
    solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    solverConf.setInt("mapred.map.tasks", LARGE_MATRIX_MAP_TASKS_NUM);
    solverConf.setLong("mapred.task.timeout", 600000 * 5);
    solverConf.setInt("mapred.job.reuse.jvm.num.tasks", -1);
    solverConf.set("mapred.child.java.opts", SMALL_MATRIX_MEMORY);

    solverConf.set(LAMBDA, String.valueOf(lambda));
    solverConf.set(ALPHA, String.valueOf(alpha));
    solverConf.setInt(NUM_FEATURES, numFeatures);
    solverConf.set(FEATURE_MATRIX, pathToUorI.toString());
    solverConf.set(FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString());
    solverConf.setInt("rowNums", rowNums);
    solverConf.setBoolean("mapred.compress.map.output", true);
    solverConf.set("mapred.map.output.compression.codec", LZO_CODEC_CLASS);
    solverForUorI.waitForCompletion(true);
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java

License:Apache License

private void runSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int numRows,
        boolean largeMatrix) throws ClassNotFoundException, IOException, InterruptedException {
    @SuppressWarnings("rawtypes")
    Class<? extends Mapper> solverMapper = implicitFeedback ? SolveImplicitFeedbackMultithreadedMapper.class
            : SolveExplicitFeedbackMapper.class;

    Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);

    Configuration solverConf = solverForUorI.getConfiguration();

    long matrixSizeExp = (long) (8L * numRows * numFeatures * SAFE_MARGIN);
    long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / HadoopClusterUtil.MAP_TASKS_PER_NODE;
    int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / matrixSizeExp));

    if (matrixSizeExp > memoryThreshold) {
        solverConf.set("mapred.child.java.opts", "-Xmx8g");
        solverConf.set("mapred.map.child.java.opts", "-Xmx8g");
        solverConf.setLong("dfs.block.size", HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.setInt("mapred.map.tasks", HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
        solverConf.setLong("mapred.min.split.size",
                HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.setLong("mapred.max.split.size",
                HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed()));
        solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE, pathToHostLocks().toString());
        solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE_NUMS,
                Math.min(HadoopClusterUtil.MAP_TASKS_PER_NODE, numTaskPerDataNode));
    } else {/*www  . j a va  2s  .c  o  m*/
        solverConf.setLong("mapred.min.split.size",
                HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings));
        solverConf.setLong("mapred.max.split.size",
                HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings));
        solverConf.setInt("mapred.map.tasks",
                HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks);
        //solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    }
    solverConf.setLong("mapred.task.timeout", taskTimeout);
    solverConf.setBoolean("mapred.map.tasks.speculative.execution", false);

    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LAMBDA, String.valueOf(lambda));
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.ALPHA, String.valueOf(alpha));
    solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_FEATURES, numFeatures);
    solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_ROWS, numRows);
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX, pathToUorI.toString());
    solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX_TRANSPOSE,
            pathToTranspose.toString());

    solverForUorI.waitForCompletion(true);
}