List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);// w ww.j a v a 2s . co m return job; }
From source file:com.pocketx.gravity.recommender.cf.similarity.job.RowSimilarityJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w ww . j a v a 2 s . c o m*/ addOutputOption(); addOption("numberOfColumns", "r", "Number of columns in the input matrix", false); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW)); addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false)); addOption("threshold", "tr", "discard row pairs with a similarity value below this", false); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int numberOfColumns; if (hasOption("numberOfColumns")) { // Number of columns explicitly specified via CLI numberOfColumns = Integer.parseInt(getOption("numberOfColumns")); } else { // else get the number of columns by determining the cardinality of a vector in the input matrix numberOfColumns = getDimensions(getInputPath()); } String similarityClassnameArg = getOption("similarityClassname"); String similarityClassname; try { similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname(); } catch (IllegalArgumentException iae) { similarityClassname = similarityClassnameArg; } // Clear the output and temp paths if the overwrite option has been set if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { // Clear the temp path HadoopUtil.delete(getConf(), getTempPath()); // Clear the output path HadoopUtil.delete(getConf(), getOutputPath()); } int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow")); boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD; Path weightsPath = getTempPath("weights"); Path normsPath = getTempPath("norms.bin"); Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin"); Path maxValuesPath = getTempPath("maxValues.bin"); Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job normsAndTranspose = prepareJob(getInputPath(), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class); Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration(); normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold)); normsAndTransposeConf.set(NORMS_PATH, normsPath.toString()); normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString()); normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname); boolean succeeded = normsAndTranspose.waitForCompletion(true); if (!succeeded) { return -1; } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class); pairwiseSimilarity.setCombinerClass(VectorSumReducer.class); Configuration pairwiseConf = pairwiseSimilarity.getConfiguration(); pairwiseConf.set(THRESHOLD, String.valueOf(threshold)); pairwiseConf.set(NORMS_PATH, normsPath.toString()); pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString()); pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname); pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns); pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity); boolean succeeded = pairwiseSimilarity.waitForCompletion(true); if (!succeeded) { return -1; } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class); asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class); asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow); boolean succeeded = asMatrix.waitForCompletion(true); if (!succeeded) { return -1; } } return 0; }
From source file:com.qubole.rubix.core.TestCachingInputStream.java
License:Apache License
@BeforeMethod public void setup() throws IOException, InterruptedException { final Configuration conf = new Configuration(); conf.setBoolean(CacheConfig.DATA_CACHE_STRICT_MODE, true); conf.setInt(CacheConfig.dataCacheBookkeeperPortConf, 3456); Thread thread = new Thread() { public void run() { BookKeeperServer.startServer(conf); }/*from www . j a v a2 s . c om*/ }; thread.start(); DataGen.populateFile(backendFileName); while (!BookKeeperServer.isServerUp()) { Thread.sleep(200); log.info("Waiting for BookKeeper Server to come up"); } createCachingStream(conf); }
From source file:com.qubole.rubix.core.TestCachingInputStream.java
License:Apache License
public void createCachingStream(Configuration conf) throws InterruptedException, IOException { conf.setBoolean(CacheConfig.DATA_CACHE_STRICT_MODE, true); conf.setInt(CacheConfig.dataCacheBookkeeperPortConf, 3456); File file = new File(backendFileName); LocalFSInputStream localFSInputStream = new LocalFSInputStream(backendFileName); FSDataInputStream fsDataInputStream = new FSDataInputStream(localFSInputStream); conf.setInt(CacheConfig.blockSizeConf, blockSize); log.info("All set to test"); // This should be after server comes up else client could not be created inputStream = new CachingInputStream(fsDataInputStream, conf, backendPath, file.length(), file.lastModified(), new CachingFileSystemStats(), 64 * 1024 * 1024, ClusterType.TEST_CLUSTER_MANAGER); }
From source file:com.rim.logdriver.sawmill.Sawmill.java
License:Apache License
public void run(String[] args) { if (args.length < 1) { System.out.println("Usage: " + this.getClass().getSimpleName() + " <config.properties>"); System.exit(1);/*from w ww . ja v a 2s . c om*/ } LOG.info("Starting {}", Sawmill.class.getSimpleName()); // First arg is the config String configFile = args[0]; // Load configuration. Properties conf = new Properties(); try { conf.load(new FileInputStream(configFile)); } catch (FileNotFoundException e) { LOG.error("Config file not found.", e); System.exit(1); } catch (Throwable t) { LOG.error("Error reading config file.", t); System.exit(1); } // Parse the configuration. // Load in any Hadoop config files. Configuration hConf = new Configuration(); { String[] hadoopConfs = Configs.hadoopConfigPaths.getArray(conf); for (String confPath : hadoopConfs) { hConf.addResource(new Path(confPath)); } // Also, don't shut down my FileSystem automatically!!! hConf.setBoolean("fs.automatic.close", false); for (Entry<Object, Object> e : System.getProperties().entrySet()) { if (e.getValue() instanceof Integer) { hConf.setInt(e.getKey().toString(), (Integer) e.getValue()); } else if (e.getValue() instanceof Long) { hConf.setLong(e.getKey().toString(), (Long) e.getValue()); } else { hConf.set(e.getKey().toString(), e.getValue().toString()); } } } // Ensure that UserGroupInformation is set up, and knows if security is // enabled. UserGroupInformation.setConfiguration(hConf); // Kerberos credentials. If these are not present, then it just won't try to // authenticate. String kerbConfPrincipal = Configs.kerberosPrincipal.get(conf); String kerbKeytab = Configs.kerberosKeytab.get(conf); Authenticator.getInstance().setKerbConfPrincipal(kerbConfPrincipal); Authenticator.getInstance().setKerbKeytab(kerbKeytab); // Check out the number of threads for workers, and creater the threadpools // for both workers and stats updates. int threadCount = Configs.threadpoolSize.getInteger(conf); final ScheduledExecutorService executor = Executors.newScheduledThreadPool(threadCount); // Get the MBean server MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); // Set up the Mina Exception Monitor ExceptionMonitor.setInstance(new ExceptionLoggerExceptionMonitor()); // For each port->output mapping, create a path (listener, queue, worker). // List<DataPath> paths = new ArrayList<DataPath>(); final List<IoAcceptor> acceptors = new ArrayList<IoAcceptor>(); final List<Writer> writers = new ArrayList<Writer>(); { String[] pathStrings = Configs.paths.getArray(conf); for (String p : pathStrings) { Properties pathConf = Util.subProperties(conf, "path." + p); String name = Configs.name.get(pathConf); if (name == null) { LOG.info("Path has no name. Using {}", p); name = p; } LOG.info("[{}] Configuring path {}", name, name); // Check the properties for this specific instance Integer maxLineLength = Configs.tcpMaxLineLength.getInteger(pathConf); if (maxLineLength == null) { maxLineLength = Configs.defaultTcpMaxLineLength.getInteger(conf); } LOG.info("[{}] Maximum line length is {}", name, maxLineLength); InetAddress bindAddress = null; try { String address = Configs.bindAddress.get(pathConf); bindAddress = InetAddress.getByName(address); } catch (UnknownHostException e) { LOG.error("[{}] Error getting bindAddress from string {}", new Object[] { name, pathConf.getProperty("bindAddress") }, e); } Integer port = Configs.port.getInteger(pathConf); if (port == null) { LOG.error("[{}] Port not set. Skipping this path.", name); continue; } int queueLength = Configs.queueCapacity.getInteger(pathConf); // Set up the actual processing chain IoAcceptor acceptor = new NioSocketAcceptor(); SocketSessionConfig sessionConfig = (SocketSessionConfig) acceptor.getSessionConfig(); sessionConfig.setReuseAddress(true); acceptors.add(acceptor); String charsetName = Configs.charset.getString(pathConf); Charset charset = null; try { charset = Charset.forName(charsetName); } catch (UnsupportedCharsetException e) { LOG.error("[{}] Charset '{}' is not supported. Defaulting to UTF-8.", name, charsetName); charset = Charset.forName("UTF-8"); } LOG.info("[{}] Using character set {}", name, charset.displayName()); TextLineCodecFactory textLineCodecFactory = new TextLineCodecFactory(charset, LineDelimiter.UNIX, LineDelimiter.AUTO); textLineCodecFactory.setDecoderMaxLineLength(maxLineLength); acceptor.getFilterChain().addLast("textLineCodec", new ProtocolCodecFilter(textLineCodecFactory)); int numBuckets = Configs.outputBuckets.getInteger(pathConf); if (numBuckets > 1) { // Set up mulitple writers for one MultiEnqueueHandler @SuppressWarnings("unchecked") BlockingQueue<String>[] queues = new BlockingQueue[numBuckets]; for (int i = 0; i < numBuckets; i++) { BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength); queues[i] = queue; // Set up the processor on the other end. Writer writer = new Writer(); writer.setName(name); writer.setConfig(pathConf); writer.setHadoopConf(hConf); writer.setQueue(queue); writer.init(); // Set up MBean for the Writer { ObjectName mbeanName = null; try { mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type=" + Writer.class.getSimpleName() + " [" + i + "]" + ",name=" + name); } catch (MalformedObjectNameException e) { LOG.error("[{}] Error creating MBean name.", name, e); } catch (NullPointerException e) { LOG.error("[{}] Error creating MBean name.", name, e); } try { mbs.registerMBean(writer, mbeanName); } catch (InstanceAlreadyExistsException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (MBeanRegistrationException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (NotCompliantMBeanException e) { LOG.error("[{}] Error registering MBean name.", name, e); } } executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS); writers.add(writer); } MultiEnqueueHandler handler = new MultiEnqueueHandler(queues); acceptor.setHandler(handler); // Set up MBean for the MultiEnqueueHandler { ObjectName mbeanName = null; try { mbeanName = new ObjectName(MultiEnqueueHandler.class.getPackage().getName() + ":type=" + MultiEnqueueHandler.class.getSimpleName() + ",name=" + name); } catch (MalformedObjectNameException e) { LOG.error("[{}] Error creating MBean name.", name, e); } catch (NullPointerException e) { LOG.error("[{}] Error creating MBean name.", name, e); } try { mbs.registerMBean(handler, mbeanName); } catch (InstanceAlreadyExistsException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (MBeanRegistrationException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (NotCompliantMBeanException e) { LOG.error("[{}] Error registering MBean name.", name, e); } } } else { BlockingQueue<String> queue = new ArrayBlockingQueue<String>(queueLength); // Set up the processor on the other end. Writer writer = new Writer(); writer.setName(name); writer.setConfig(pathConf); writer.setHadoopConf(hConf); writer.setQueue(queue); writer.init(); // Set up MBean for the Writer { ObjectName mbeanName = null; try { mbeanName = new ObjectName(Writer.class.getPackage().getName() + ":type=" + Writer.class.getSimpleName() + ",name=" + name); } catch (MalformedObjectNameException e) { LOG.error("[{}] Error creating MBean name.", name, e); } catch (NullPointerException e) { LOG.error("[{}] Error creating MBean name.", name, e); } try { mbs.registerMBean(writer, mbeanName); } catch (InstanceAlreadyExistsException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (MBeanRegistrationException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (NotCompliantMBeanException e) { LOG.error("[{}] Error registering MBean name.", name, e); } } executor.scheduleWithFixedDelay(writer, 0, 100, TimeUnit.MILLISECONDS); writers.add(writer); EnqueueHandler handler = new EnqueueHandler(queue); acceptor.setHandler(handler); // Set up MBean for the EnqueueHandler { ObjectName mbeanName = null; try { mbeanName = new ObjectName(EnqueueHandler.class.getPackage().getName() + ":type=" + EnqueueHandler.class.getSimpleName() + ",name=" + name); } catch (MalformedObjectNameException e) { LOG.error("[{}] Error creating MBean name.", name, e); } catch (NullPointerException e) { LOG.error("[{}] Error creating MBean name.", name, e); } try { mbs.registerMBean(handler, mbeanName); } catch (InstanceAlreadyExistsException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (MBeanRegistrationException e) { LOG.error("[{}] Error registering MBean name.", name, e); } catch (NotCompliantMBeanException e) { LOG.error("[{}] Error registering MBean name.", name, e); } } } acceptor.getSessionConfig().setReadBufferSize(Configs.tcpReadBufferSize.getInteger(pathConf)); acceptor.getSessionConfig().setIdleTime(IdleStatus.BOTH_IDLE, 5); while (true) { try { acceptor.bind(new InetSocketAddress(bindAddress, port)); } catch (IOException e) { LOG.error("Error binding to {}:{}. Retrying...", bindAddress, port); try { Thread.sleep(2000); } catch (InterruptedException e1) { // nothing } continue; } break; } } } // Register a shutdown hook.. Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { LOG.info("Shutting down"); LOG.info("Unbinding and disposing of all IoAcceptors"); for (IoAcceptor acceptor : acceptors) { acceptor.unbind(); acceptor.dispose(true); } LOG.info("Shutting down worker threadpools. This could take a little while."); executor.shutdown(); try { executor.awaitTermination(10, TimeUnit.MINUTES); } catch (InterruptedException e) { LOG.error("Interrupted waiting for writer threadpool termination.", e); } if (!executor.isTerminated()) { LOG.error("Threadpool did not terminate cleanly."); } LOG.info("Cleaning out any remaining messages from the queues."); List<Thread> threads = new ArrayList<Thread>(); for (final Writer writer : writers) { Runnable r = new Runnable() { @Override public void run() { try { writer.runAndClose(); } catch (Throwable t) { LOG.error("Error shutting down writer [{}]", writer.getName(), t); } } }; Thread t = new Thread(r); t.setDaemon(false); t.start(); threads.add(t); } for (Thread t : threads) { try { t.join(); } catch (InterruptedException e) { LOG.error("Interrupted waiting for thread to finish."); } } LOG.info("Closing filesystems."); try { FileSystem.closeAll(); } catch (Throwable t) { LOG.error("Error closing filesystems.", t); } LOG.info("Finished shutting down cleanly."); } }); }
From source file:com.savy3.nonequijoin.MapOutputSampler.java
License:Apache License
/** * Driver for InputSampler MapReduce Job *//*w w w . jav a 2 s . c o m*/ public static void runMap(Job job, Path sampleInputPath) throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException { LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString()); Configuration conf = new Configuration(); conf.setBoolean("mapreduce.job.ubertask.enable", true); conf.set("numSamples", "" + (job.getNumReduceTasks() - 1)); Job sampleJob = new Job(conf); sampleJob.setMapperClass(job.getMapperClass()); sampleJob.setReducerClass(SampleKeyReducer.class); sampleJob.setJarByClass(job.getMapperClass()); sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setMapOutputValueClass(job.getMapOutputValueClass()); sampleJob.setOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setOutputValueClass(NullWritable.class); sampleJob.setInputFormatClass(SequenceFileInputFormat.class); sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath); FileSystem fs = FileSystem.get(conf); Path out = new Path(sampleInputPath.getParent(), "mapOut"); fs.delete(out, true); SequenceFileOutputFormat.setOutputPath(sampleJob, out); sampleJob.waitForCompletion(true); LOG.info("Sample MapReduce Job Output File" + out.toString()); Path partFile = new Path(out, "part-r-00000"); Path tmpFile = new Path("/_tmp"); fs.delete(tmpFile, true); fs.rename(partFile, tmpFile); fs.delete(sampleInputPath.getParent(), true); fs.rename(new Path("/_tmp"), sampleInputPath.getParent()); LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString()); }
From source file:com.scaleoutsoftware.soss.hserver.GridOutputFormat.java
License:Apache License
/** * Sets the {@link NamedMap} to direct output to. * * @param job job to modify/*from w w w . j a v a2s. c o m*/ * @param map named map to be used for output */ public static void setNamedMap(Job job, NamedMap map) { Configuration configuration = job.getConfiguration(); configuration.setBoolean(outputIsNamedMapProperty, true); configuration.setStrings(outputNamedMapProperty, map.getMapName()); CustomSerializer keySerializer = map.getKeySerializer(); CustomSerializer valueSerializer = map.getValueSerializer(); SerializationMode serializationMode = map.getSerializationMode(); AvailabilityMode availabilityMode = map.getAvailabilityMode(); configuration.setInt(SERIALIZATION_MODE, serializationMode.ordinal()); configuration.setInt(AVAILABILITY_MODE, availabilityMode.ordinal()); configuration.setClass(outputNamedMapKeySerializerProperty, keySerializer.getClass(), Object.class); configuration.setClass(outputNamedMapValueSerializerProperty, valueSerializer.getClass(), Object.class); if (keySerializer.getObjectClass() != null) { configuration.setClass(outputNamedMapKeyProperty, keySerializer.getObjectClass(), Object.class); } if (valueSerializer.getObjectClass() != null) { configuration.setClass(outputNamedMapValueProperty, valueSerializer.getObjectClass(), Object.class); } }
From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java
License:Apache License
private void runSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int rowNums) throws ClassNotFoundException, IOException, InterruptedException { @SuppressWarnings("rawtypes") Class<? extends Mapper> solverMapper = null; if (implicitFeedback) { solverMapper = SolveImplicitFeedbackMapper.class; } else {//from w w w . j a v a 2s . c o m solverMapper = SolveExplicitFeedbackMapper.class; } Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); Configuration solverConf = solverForUorI.getConfiguration(); solverConf.set(LAMBDA, String.valueOf(lambda)); solverConf.set(ALPHA, String.valueOf(alpha)); solverConf.setInt(NUM_FEATURES, numFeatures); solverConf.set(FEATURE_MATRIX, pathToUorI.toString()); solverConf.set(FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString()); solverConf.setInt("rowNums", rowNums); solverConf.set("mapred.child.java.opts", SMALL_MATRIX_MEMORY); solverConf.setBoolean("mapred.map.tasks.speculative.execution", false); solverConf.setInt("mapred.job.reuse.jvm.num.tasks", -1); solverConf.setBoolean("mapred.compress.map.output", true); solverConf.set("mapred.map.output.compression.codec", LZO_CODEC_CLASS); solverForUorI.waitForCompletion(true); }
From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java
License:Apache License
private void runDistributedImplicitSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int rowNums) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("rawtypes") Class<? extends Mapper> solverMapper = DistributedSolveImplicitFeedbackMapper.class; Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); Configuration solverConf = solverForUorI.getConfiguration(); solverConf.setLong("mapred.min.split.size", dfsBlockSize); solverConf.setLong("mapred.max.split.size", dfsBlockSize); solverConf.setBoolean("mapred.map.tasks.speculative.execution", false); solverConf.setInt("mapred.map.tasks", LARGE_MATRIX_MAP_TASKS_NUM); solverConf.setLong("mapred.task.timeout", 600000 * 5); solverConf.setInt("mapred.job.reuse.jvm.num.tasks", -1); solverConf.set("mapred.child.java.opts", SMALL_MATRIX_MEMORY); solverConf.set(LAMBDA, String.valueOf(lambda)); solverConf.set(ALPHA, String.valueOf(alpha)); solverConf.setInt(NUM_FEATURES, numFeatures); solverConf.set(FEATURE_MATRIX, pathToUorI.toString()); solverConf.set(FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString()); solverConf.setInt("rowNums", rowNums); solverConf.setBoolean("mapred.compress.map.output", true); solverConf.set("mapred.map.output.compression.codec", LZO_CODEC_CLASS); solverForUorI.waitForCompletion(true); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java
License:Apache License
private void runSolver(Path ratings, Path output, Path pathToUorI, Path pathToTranspose, int numRows, boolean largeMatrix) throws ClassNotFoundException, IOException, InterruptedException { @SuppressWarnings("rawtypes") Class<? extends Mapper> solverMapper = implicitFeedback ? SolveImplicitFeedbackMultithreadedMapper.class : SolveExplicitFeedbackMapper.class; Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); Configuration solverConf = solverForUorI.getConfiguration(); long matrixSizeExp = (long) (8L * numRows * numFeatures * SAFE_MARGIN); long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / HadoopClusterUtil.MAP_TASKS_PER_NODE; int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / matrixSizeExp)); if (matrixSizeExp > memoryThreshold) { solverConf.set("mapred.child.java.opts", "-Xmx8g"); solverConf.set("mapred.map.child.java.opts", "-Xmx8g"); solverConf.setLong("dfs.block.size", HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed())); solverConf.setInt("mapred.map.tasks", HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); solverConf.setLong("mapred.min.split.size", HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed())); solverConf.setLong("mapred.max.split.size", HadoopClusterUtil.getMaxBlockSize(getConf(), pathToTransformed())); solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE, pathToHostLocks().toString()); solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.LOCK_FILE_NUMS, Math.min(HadoopClusterUtil.MAP_TASKS_PER_NODE, numTaskPerDataNode)); } else {/*www . j a va 2s .c o m*/ solverConf.setLong("mapred.min.split.size", HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings)); solverConf.setLong("mapred.max.split.size", HadoopClusterUtil.getMinInputSplitSizeMax(getConf(), ratings)); solverConf.setInt("mapred.map.tasks", HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks); //solverConf.setBoolean("mapred.map.tasks.speculative.execution", false); } solverConf.setLong("mapred.task.timeout", taskTimeout); solverConf.setBoolean("mapred.map.tasks.speculative.execution", false); solverConf.set(SolveImplicitFeedbackMultithreadedMapper.LAMBDA, String.valueOf(lambda)); solverConf.set(SolveImplicitFeedbackMultithreadedMapper.ALPHA, String.valueOf(alpha)); solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_FEATURES, numFeatures); solverConf.setInt(SolveImplicitFeedbackMultithreadedMapper.NUM_ROWS, numRows); solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX, pathToUorI.toString()); solverConf.set(SolveImplicitFeedbackMultithreadedMapper.FEATURE_MATRIX_TRANSPOSE, pathToTranspose.toString()); solverForUorI.waitForCompletion(true); }