List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>"); return 2; }// w w w. jav a2 s.co m final String imageID = args[0]; final String friendlyName = args[1]; final String extentsPath = args[2]; final String image = args[3]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf); job.setJarByClass(ExtractData.class); job.setMapperClass(ExtractDataMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(1); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); conf.setInt("mapreduce.job.jvm.numtasks", -1); final FileSystem fs = FileSystem.get(conf); Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); hfileDir = hfileDir.makeQualified(fs); LOG.info("Hashes will be written temporarily to " + hfileDir); HFileOutputFormat.setOutputPath(job, hfileDir); final Path extp = new Path(extentsPath); final URI extents = extp.toUri(); LOG.info("extents file is " + extents); DistributedCache.addCacheFile(extents, conf); conf.set("com.lbt.extentsname", extp.getName()); // job.getConfiguration().setBoolean("mapred.task.profile", true); // job.getConfiguration().setBoolean("mapreduce.task.profile", true); HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B); HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B); final boolean result = job.waitForCompletion(true); if (result) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); HBaseConfiguration.addHbaseResources(conf); loader.setConf(conf); LOG.info("Loading hashes into hbase"); chmodR(fs, hfileDir); loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B)); // result = fs.delete(hfileDir, true); } return result ? 0 : 1; }
From source file:com.marklogic.contentpump.Command.java
License:Apache License
static void applyBatchTxn(Configuration conf, CommandLine cmdline, int maxBatch) { String batchSize = cmdline.getOptionValue(BATCH_SIZE); int batch = MarkLogicConstants.DEFAULT_BATCH_SIZE > maxBatch ? maxBatch : MarkLogicConstants.DEFAULT_BATCH_SIZE; if (batchSize != null) { batch = Integer.decode(batchSize); if (batch > maxBatch) { LOG.warn("The setting for " + BATCH_SIZE + " is changed to " + maxBatch); batch = maxBatch;//from w w w . jav a2 s. c o m } conf.setInt(MarkLogicConstants.BATCH_SIZE, batch); } String txnSize = cmdline.getOptionValue(TRANSACTION_SIZE); if (txnSize != null) { int txn = Integer.decode(txnSize); if (txn * batch > MAX_TXN_SIZE) { txn = MAX_TXN_SIZE / batch; LOG.warn("The setting for " + TRANSACTION_SIZE + " is changed to " + txn); } conf.setInt(MarkLogicConstants.TXN_SIZE, txn); } }
From source file:com.marklogic.contentpump.ContentPump.java
License:Apache License
public static int runCommand(String[] args) throws IOException { // get command String cmd = args[0];//www .j ava 2 s. c om if (cmd.equalsIgnoreCase("help")) { printUsage(); return 1; } else if (cmd.equalsIgnoreCase("version")) { logVersions(); return 1; } Command command = Command.forName(cmd); // get options arguments String[] optionArgs = Arrays.copyOfRange(args, 1, args.length); if (LOG.isDebugEnabled()) { LOG.debug("Command: " + command); StringBuilder buf = new StringBuilder(); for (String arg : optionArgs) { buf.append(arg); buf.append(' '); } LOG.debug("Arguments: " + buf); } // parse hadoop specific options Configuration conf = new Configuration(); GenericOptionsParser genericParser = new GenericOptionsParser(conf, optionArgs); String[] remainingArgs = genericParser.getRemainingArgs(); // parse command specific options CommandlineOptions options = new CommandlineOptions(); command.configOptions(options); CommandLineParser parser = new GnuParser(); CommandLine cmdline; try { cmdline = parser.parse(options, remainingArgs); } catch (Exception e) { LOG.error("Error parsing command arguments: "); LOG.error(e.getMessage()); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } for (String arg : cmdline.getArgs()) { LOG.error("Unrecognized argument: " + arg); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } // check running mode and hadoop conf dir configuration String mode = cmdline.getOptionValue(MODE); String hadoopConfDir = System.getenv(HADOOP_CONFDIR_ENV_NAME); if (cmdline.hasOption(HADOOP_CONF_DIR)) { hadoopConfDir = cmdline.getOptionValue(HADOOP_CONF_DIR); } boolean distributed = hadoopConfDir != null && (mode == null || mode.equals(MODE_DISTRIBUTED)); if (MODE_DISTRIBUTED.equalsIgnoreCase(mode) && !distributed) { LOG.error("Cannot run in distributed mode. HADOOP_CONF_DIR is " + "not configured."); } if (LOG.isDebugEnabled()) { LOG.debug("Running in: " + (distributed ? "distributed " : "local") + "mode"); if (distributed) { LOG.debug("HADOOP_CONF_DIR is set to " + hadoopConfDir); } } conf.set(EXECUTION_MODE, distributed ? MODE_DISTRIBUTED : MODE_LOCAL); if (distributed) { if (!cmdline.hasOption(SPLIT_INPUT) && Command.getInputType(cmdline).equals(InputType.DELIMITED_TEXT)) { conf.setBoolean(ConfigConstants.CONF_SPLIT_INPUT, true); } File hdConfDir = new File(hadoopConfDir); try { checkHadoopConfDir(hdConfDir); } catch (IllegalArgumentException e) { LOG.error("Error found with Hadoop home setting", e); System.err.println(e.getMessage()); return 1; } // set new class loader based on Hadoop Conf Dir try { setClassLoader(hdConfDir, conf); } catch (Exception e) { LOG.error("Error configuring class loader", e); System.err.println(e.getMessage()); return 1; } } else { // running in local mode // Tell Hadoop that we are running in local mode. This is useful // when the user has Hadoop home or their Hadoop conf dir in their // classpath but want to run in local mode. conf.set(CONF_MAPREDUCE_JOBTRACKER_ADDRESS, "local"); } // create job Job job = null; try { if (distributed) { // So far all jobs created by mlcp are map only, // so set number of reduce tasks to 0. conf.setInt("mapreduce.job.reduces", 0); // No speculative runs since speculative tasks don't get to // clean up sessions properly conf.setBoolean("mapreduce.map.speculative", false); } else { // set working directory conf.set(CONF_MAPREDUCE_JOB_WORKING_DIR, System.getProperty("user.dir")); } job = command.createJob(conf, cmdline); } catch (Exception e) { // Print exception message. e.printStackTrace(); return 1; } LOG.info("Job name: " + job.getJobName()); // run job try { if (distributed) { // submit job submitJob(job); } else { runJobLocally(job, cmdline, command); } return 0; } catch (Exception e) { LOG.error("Error running a ContentPump job", e); e.printStackTrace(System.err); return 1; } }
From source file:com.marklogic.contentpump.MultithreadedMapper.java
License:Apache License
public static void setNumberOfThreads(Configuration conf, int threads) { conf.setInt(ConfigConstants.CONF_THREADS_PER_SPLIT, threads); }
From source file:com.mellanox.r4h.MiniDFSCluster.java
License:Apache License
private void initMiniDFSCluster(Configuration conf, int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs, boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy, boolean manageDataDfsDirs, StartupOption startOpt, StartupOption dnStartOpt, String[] racks, String[] hosts, long[] simulatedCapacities, String clusterId, boolean waitSafeMode, boolean setupHostsFile, MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown, boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays) throws IOException { ExitUtil.disableSystemExit();/*from w ww . j ava 2s . c om*/ // Re-enable symlinks for tests, see HADOOP-10020 and HADOOP-10052 FileSystem.enableSymlinks(); synchronized (MiniDFSCluster.class) { instanceId = instanceCount++; } this.conf = conf; base_dir = new File(determineDfsBaseDir()); data_dir = new File(base_dir, "data"); this.waitSafeMode = waitSafeMode; this.checkExitOnShutdown = checkExitOnShutdown; int replication = conf.getInt(DFS_REPLICATION_KEY, 3); conf.setInt(DFS_REPLICATION_KEY, Math.min(replication, numDataNodes)); int safemodeExtension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 0); conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, safemodeExtension); conf.setInt(DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 3); // 3 second conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, StaticMapping.class, DNSToSwitchMapping.class); // In an HA cluster, in order for the StandbyNode to perform checkpoints, // it needs to know the HTTP port of the Active. So, if ephemeral ports // are chosen, disable checkpoints for the test. if (!nnTopology.allHttpPortsSpecified() && nnTopology.isHA()) { LOG.info("MiniDFSCluster disabling checkpointing in the Standby node " + "since no HTTP ports have been specified."); conf.setBoolean(DFS_HA_STANDBY_CHECKPOINTS_KEY, false); } if (!nnTopology.allIpcPortsSpecified() && nnTopology.isHA()) { LOG.info("MiniDFSCluster disabling log-roll triggering in the " + "Standby node since no IPC ports have been specified."); conf.setInt(DFS_HA_LOGROLL_PERIOD_KEY, -1); } federation = nnTopology.isFederated(); try { createNameNodesAndSetConf(nnTopology, manageNameDfsDirs, manageNameDfsSharedDirs, enableManagedDfsDirsRedundancy, format, startOpt, clusterId, conf); } catch (IOException ioe) { LOG.error("IOE creating namenodes. Permissions dump:\n" + createPermissionsDiagnosisString(data_dir)); throw ioe; } if (format) { if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) { throw new IOException( "Cannot remove data directory: " + data_dir + createPermissionsDiagnosisString(data_dir)); } } if (startOpt == StartupOption.RECOVER) { return; } // Start the DataNodes startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, dnStartOpt != null ? dnStartOpt : startOpt, racks, hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig, dnConfOverlays); waitClusterUp(); // make sure ProxyUsers uses the latest conf ProxyUsers.refreshSuperUserGroupsConfiguration(conf); }
From source file:com.mellanox.r4h.TestFSOutputSummer.java
License:Apache License
private void doTestFSOutputSummer(String checksumType) throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM); conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, checksumType); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build(); fileSys = cluster.getFileSystem();//from www . j ava 2 s. c om try { Path file = new Path("try.dat"); Random rand = new Random(seed); rand.nextBytes(expected); writeFile1(file); writeFile2(file); writeFile3(file); } finally { fileSys.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestFSOutputSummer.java
License:Apache License
@Test public void TestDFSCheckSumType() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM); conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build(); fileSys = cluster.getFileSystem();/*from w ww . j a v a 2 s .com*/ try { Path file = new Path("try.dat"); Random rand = new Random(seed); rand.nextBytes(expected); writeFile1(file); } finally { fileSys.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** * The test uses//from w w w . j a va 2 s.c om * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} * to write a file with a custom block size so the writes will be * happening across block' boundaries */ @Test public void hFlush_02() throws IOException { Configuration conf = new HdfsConfiguration(); int customPerChecksumSize = 512; int customBlockSize = customPerChecksumSize * 3; // Modify defaul filesystem settings conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize); doTheJob(conf, fName, customBlockSize, (short) 2, false, EnumSet.noneOf(SyncFlag.class)); }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** * The test uses/* ww w. j a v a 2 s. c om*/ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} * to write a file with a custom block size so the writes will be * happening across block's and checksum' boundaries */ @Test public void hFlush_03() throws IOException { Configuration conf = new HdfsConfiguration(); int customPerChecksumSize = 400; int customBlockSize = customPerChecksumSize * 3; // Modify defaul filesystem settings conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize); doTheJob(conf, fName, customBlockSize, (short) 2, false, EnumSet.noneOf(SyncFlag.class)); }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** * The test calls/*from ww w .j av a2s. co m*/ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}. * Similar with {@link #hFlush_02()} , it writes a file with a custom block * size so the writes will be happening across block' boundaries */ @Test public void hSyncUpdateLength_02() throws IOException { Configuration conf = new HdfsConfiguration(); int customPerChecksumSize = 512; int customBlockSize = customPerChecksumSize * 3; // Modify defaul filesystem settings conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize); doTheJob(conf, fName, customBlockSize, (short) 2, true, EnumSet.of(SyncFlag.UPDATE_LENGTH)); }