List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:org.apache.apex.examples.parser.regexparser.RegexParserApplicationTest.java
License:Apache License
@Test public void testApplication() throws IOException, Exception { try {//w w w .j a v a 2 s. c o m LocalMode lma = LocalMode.newInstance(); Configuration conf = new Configuration(false); conf.addResource(this.getClass().getResourceAsStream("/properties-regexParserApplication.xml")); conf.setLong("dt.application.RegexParser.operator.logGenerator.prop.tupleRate", 10); final String dataFolderPath = conf.get("dt.application.RegexParser.operator.*.prop.filePath"); final String dataFileName = conf .get("dt.application.RegexParser.operator.regexWriter.prop.outputFileName"); FileUtils.deleteDirectory(new File(dataFolderPath)); lma.prepareDAG(new RegexParserApplication(), conf); LocalMode.Controller lc = lma.getController(); lc.setHeartbeatMonitoringEnabled(false); ((StramLocalCluster) lc).setExitCondition(new Callable<Boolean>() { @Override public Boolean call() throws Exception { if (new File(dataFolderPath).exists()) { Collection<File> files = FileUtils.listFiles(new File(dataFolderPath), new WildcardFileFilter(dataFileName + "*"), null); if (files.size() >= 1) { File parsedFile = files.iterator().next(); String fileData = FileUtils.readFileToString(parsedFile); String[] regexData = fileData.split("\n"); return regexData.length == 10; } } return false; } }); lc.run(30 * 1000); // runs for 30 seconds and quitxs Collection<File> files = FileUtils.listFiles(new File(dataFolderPath), new WildcardFileFilter(dataFileName + "*"), null); File parsedFile = files.iterator().next(); String fileData = FileUtils.readFileToString(parsedFile); String[] logData = fileData.split("\n"); for (String logLine : logData) { Assert.assertTrue(logLine.contains("id=" + 101)); Assert.assertTrue(logLine.contains("signInId=" + "'11111@psop.com'")); Assert.assertTrue(logLine.contains("serviceId=" + "'IP1234-NPB12345_00'")); Assert.assertTrue(logLine.contains("accountId=" + "'11111'")); Assert.assertTrue(logLine.contains("platform=" + "'pik'")); } } catch (ConstraintViolationException e) { Assert.fail("constraint violations: " + e.getConstraintViolations()); } }
From source file:org.apache.crunch.util.PartitionUtilsTest.java
License:Apache License
@Test public void testBytesPerTask() throws Exception { Configuration conf = new Configuration(); conf.setLong(PartitionUtils.BYTES_PER_REDUCE_TASK, 500L * 1000L * 1000L); when(pcollection.getSize()).thenReturn(7 * 1000L * 1000L * 1000L); when(pcollection.getPipeline()).thenReturn(pipeline); when(pipeline.getConfiguration()).thenReturn(conf); assertEquals(15, PartitionUtils.getRecommendedPartitions(pcollection)); }
From source file:org.apache.giraph.conf.LongConfOption.java
License:Apache License
/** * Set value if it's not already present * * @param conf Configuration/*from ww w . j a v a 2s . co m*/ * @param value to set */ public void setIfUnset(Configuration conf, long value) { if (!contains(conf)) { conf.setLong(getKey(), value); } }
From source file:org.apache.giraph.hive.jython.HiveJythonUtils.java
License:Apache License
/** * Set arbitrary option of unknown type in Configuration * * @param conf Configuration//from w w w . j a va 2 s .co m * @param key String key * @param value Object to set */ private static void setOption(Configuration conf, String key, Object value) { if (value instanceof Boolean) { conf.getBoolean(key, (Boolean) value); } else if (value instanceof Byte || value instanceof Short || value instanceof Integer) { conf.setInt(key, ((Number) value).intValue()); } else if (value instanceof Long) { conf.setLong(key, (Long) value); } else if (value instanceof Float || value instanceof Double) { conf.setFloat(key, ((Number) value).floatValue()); } else if (value instanceof String) { conf.set(key, value.toString()); } else if (value instanceof Class) { conf.set(key, ((Class) value).getName()); } else { throw new IllegalArgumentException("Don't know how to handle option key: " + key + ", value: " + value + ", value type: " + value.getClass()); } }
From source file:org.apache.giraph.partition.SimpleRangePartitionFactoryTest.java
License:Apache License
private void testRange(int numWorkers, int keySpaceSize, int allowedWorkerDiff, boolean emptyWorkers) { Configuration conf = new Configuration(); conf.setLong(GiraphConstants.PARTITION_VERTEX_KEY_SPACE_SIZE, keySpaceSize); SimpleLongRangePartitionerFactory<Writable, Writable> factory = new SimpleLongRangePartitionerFactory<Writable, Writable>(); factory.setConf(new ImmutableClassesGiraphConfiguration(conf)); ArrayList<WorkerInfo> infos = new ArrayList<WorkerInfo>(); for (int i = 0; i < numWorkers; i++) { WorkerInfo info = new WorkerInfo(); info.setInetSocketAddress(new InetSocketAddress(8080)); info.setTaskId(i);/* w w w . j a v a 2s . com*/ infos.add(info); } Collection<PartitionOwner> owners = factory.createMasterGraphPartitioner() .createInitialPartitionOwners(infos, -1); int[] tasks = new int[owners.size()]; for (PartitionOwner owner : owners) { WorkerInfo worker = owner.getWorkerInfo(); assertEquals(0, tasks[owner.getPartitionId()]); tasks[owner.getPartitionId()] = worker.getTaskId() + 1; } checkMapping(tasks, allowedWorkerDiff, emptyWorkers); WorkerGraphPartitioner<LongWritable, Writable, Writable> workerPartitioner = factory .createWorkerGraphPartitioner(); workerPartitioner.updatePartitionOwners(null, owners); LongWritable longWritable = new LongWritable(); int[] partitions = new int[keySpaceSize]; for (int i = 0; i < keySpaceSize; i++) { longWritable.set(i); PartitionOwner owner = workerPartitioner.getPartitionOwner(longWritable); partitions[i] = owner.getPartitionId(); } checkMapping(partitions, 1, emptyWorkers); }
From source file:org.apache.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ private static Job createJob(String name, Configuration conf) throws IOException { final Job job = Job.getInstance(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);// w w w .ja va2 s . c o m // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:org.apache.hadoop.examples.pi.DistSum.java
License:Apache License
/** Create a job */ private Job createJob(String name, Summation sigma) throws IOException { final Job job = Job.getInstance(getConf(), parameters.remoteDir + "/" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(DistSum.class); jobconf.setInt(N_PARTS, parameters.nParts); SummationWritable.write(sigma, DistSum.class, jobconf); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job;// w w w . ja va2 s . co m }
From source file:org.apache.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything./* w w w.j a v a 2 s . co m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything./*from ww w . ja v a 2 s . c om*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java
License:Apache License
/** * Tests for input splitting//from ww w . ja v a 2 s .c o m * * @throws IOException * @throws InterruptedException */ @Test public final void split_input_02() throws IOException, InterruptedException { Assume.assumeTrue(this.canSplitInputs()); Configuration config = this.prepareConfiguration(); config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); config.setLong(NLineInputFormat.LINES_PER_MAP, 10); Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE); }