List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:com.netflix.bdp.s3mper.listing.ConsistentListingAspect.java
License:Apache License
private void updateConfig(Configuration conf) { disabled = conf.getBoolean("s3mper.disable", disabled); if (disabled) { log.warn("S3mper Consistency explicitly disabled."); return;//from w w w . j av a2s . co m } darkload = conf.getBoolean("s3mper.darkload", darkload); failOnError = conf.getBoolean("s3mper.failOnError", failOnError); taskFailOnError = conf.getBoolean("s3mper.task.failOnError", taskFailOnError); checkTaskListings = conf.getBoolean("s3mper.listing.task.check", checkTaskListings); failOnTimeout = conf.getBoolean("s3mper.failOnTimeout", failOnTimeout); delistDeleteMarkedFiles = conf.getBoolean("s3mper.listing.delist.deleted", delistDeleteMarkedFiles); trackDirectories = conf.getBoolean("s3mper.listing.directory.tracking", trackDirectories); fileThreshold = conf.getFloat("s3mper.listing.threshold", fileThreshold); recheckCount = conf.getLong("s3mper.listing.recheck.count", recheckCount); recheckPeriod = conf.getLong("s3mper.listing.recheck.period", recheckPeriod); taskRecheckCount = conf.getLong("s3mper.listing.task.recheck.count", taskRecheckCount); taskRecheckPeriod = conf.getLong("s3mper.listing.task.recheck.period", taskRecheckPeriod); statOnMissingFile = conf.getBoolean("s3mper.listing.statOnMissingFile", false); }
From source file:com.netflix.bdp.s3mper.metastore.impl.DynamoDBMetastore.java
License:Apache License
/** * Creates the metastore table in DynamoDB if it doesn't exist with the configured * read and write units./*from w w w .j a v a2s. c om*/ * * @param uri * @param conf * @throws Exception */ @Override public void initalize(URI uri, Configuration conf) throws Exception { scheme = uri.getScheme(); String keyId = conf.get("fs." + uri.getScheme() + ".awsAccessKeyId"); String keySecret = conf.get("fs." + uri.getScheme() + ".awsSecretAccessKey"); //An override option for accessing across accounts keyId = conf.get("s3mper.override.awsAccessKeyId", keyId); keySecret = conf.get("s3mper.override.awsSecretAccessKey", keySecret); db = new AmazonDynamoDBClient(new BasicAWSCredentials(keyId, keySecret)); readUnits = conf.getLong("s3mper.metastore.read.units", readUnits); writeUnits = conf.getLong("s3mper.metastore.write.units", writeUnits); retryCount = conf.getInt("s3mper.metastore.retry", retryCount); timeout = conf.getInt("s3mper.metastore.timeout", timeout); tableName = conf.get("s3mper.metastore.name", tableName); deleteMarkerEnabled = conf.getBoolean("s3mper.metastore.deleteMarker.enabled", false); boolean checkTableExists = conf.getBoolean("s3mper.metastore.create", false); if (checkTableExists) { ListTablesResult tables = db.listTables(); if (!tables.getTableNames().contains(tableName)) { createTable(); } } }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);// w w w. j a va2s. c o m // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter() //getRecordWriter() final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/* w w w. j a v a 2 s .co m*/ // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv throws IOException { // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);// // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) {// if byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * * @param family * * @return A WriterLength, containing a new StoreFile.Writer. * * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush();// dos.close();// for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.odiago.flumebase.exec.BucketedAggregationElement.java
License:Apache License
public BucketedAggregationElement(FlowElementContext ctxt, AggregateNode aggregateNode) { super(ctxt, (Schema) aggregateNode.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR)); Configuration conf = aggregateNode.getConf(); assert null != conf; mNumBuckets = conf.getInt(NUM_BUCKETS_KEY, DEFAULT_NUM_BUCKETS); mContinuousOutput = conf.getBoolean(CONTINUOUS_OUTPUT_KEY, DEFAULT_CONTINUOUS_OUTPUT); mMaxPriorEmitInterval = conf.getLong(MAX_PRIOR_EMIT_INTERVAL_KEY, DEFAULT_MAX_PRIOR_EMIT_INTERVAL); int slackTime = conf.getInt(SLACK_INTERVAL_KEY, DEFAULT_SLACK_INTERVAL); if (slackTime < 0) { mSlackTime = DEFAULT_SLACK_INTERVAL; } else {/* w w w . j ava 2 s . co m*/ mSlackTime = slackTime; } assert mMaxPriorEmitInterval > 0; assert mMaxPriorEmitInterval > mSlackTime; List<TypedField> groupByFields = aggregateNode.getGroupByFields(); if (null == groupByFields) { mGroupByFields = Collections.emptyList(); } else { mGroupByFields = groupByFields; } mAggregateExprs = aggregateNode.getAggregateExprs(); assert mAggregateExprs != null; mPropagateFields = aggregateNode.getPropagateFields(); Expr windowExpr = aggregateNode.getWindowExpr(); assert windowExpr.isConstant(); try { mWindowSpec = (WindowSpec) windowExpr.eval(new EmptyEventWrapper()); assert mWindowSpec.getRangeSpec().isConstant(); mTimeSpan = (TimeSpan) mWindowSpec.getRangeSpec().eval(new EmptyEventWrapper()); } catch (IOException ioe) { // The only way this can be thrown is if the window expr isn't actually constant. // This should not happen due to the assert above.. LOG.error("Got IOException when calculating window width: " + ioe); throw new RuntimeException(ioe); } mBucketMap = new HashMap<Pair<Long, HashedEvent>, List<Bucket>>(mNumBuckets); mBucketsByGroup = new HashMap<HashedEvent, List<Pair<Long, List<Bucket>>>>(); // Calculate the width of each bucket. mTimeModulus = mTimeSpan.getWidth() / mNumBuckets; if (mTimeModulus * mNumBuckets != mTimeSpan.getWidth()) { LOG.warn("Aggregation time step does not cleanly divide the time interval; " + "results may be inaccurate. Set " + NUM_BUCKETS_KEY + " to a better divisor."); } }
From source file:com.odiago.flumebase.exec.local.LocalEnvironment.java
License:Apache License
/** Given a Configuration that has SUBMITTER_SESSION_ID_KEY set, return the * UserSession corresponding to this SessionId. This is used to resolve the * submitter of a LocalFlow, FlowSpecification, etc. *//* w ww . j a v a 2s . c om*/ private UserSession getSessionForConf(Configuration conf) { SessionId id = new SessionId(conf.getLong(SUBMITTER_SESSION_ID_KEY, -1)); return getSession(id); }
From source file:com.phantom.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything./*w w w . j a v a 2 s . c o m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything./* ww w . java 2 s . c o m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * /*from w w w.j a v a 2 s.c o m*/ * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.quantcast.qfs.hadoop.QFSImpl.java
License:Apache License
public QFSImpl(String metaServerHost, int metaServerPort, FileSystem.Statistics stats, Configuration cfg) throws IOException { kfsAccess = new KfsAccess(metaServerHost, metaServerPort); final long kMaxUserGroupId = 0x0FFFFFFFFL; final long kDefaultUser = ~0L; final long kDefaultGroup = ~0L; final long euser = cfg.getLong("fs.qfs.euser", kDefaultUser); final long egroup = cfg.getLong("fs.qfs.egroup", kDefaultGroup); final String groupsCfgName = "fs.qfs.egroups"; final String groupsSeparator = ","; // No regex special symbols. final String groupsCfg = cfg.get(groupsCfgName, ""); long[] groups = null; CREATE_PARAMS = cfg.get("fs.qfs.createParams", "S"); if (kDefaultUser != euser && (euser < 0 || kMaxUserGroupId <= euser)) { throw new IOException("invalid effective user id: " + euser); }/*from w w w . j a v a2s . co m*/ if (kDefaultGroup != egroup && (egroup < 0 || kMaxUserGroupId <= egroup)) { throw new IOException("invalid effective group id: " + egroup); } if (groupsCfg.contains(groupsSeparator)) { try { final String[] tokens = groupsCfg.split(groupsSeparator); if (0 < tokens.length) { groups = new long[tokens.length]; for (int i = 0; i < tokens.length; i++) { groups[i] = Long.parseLong(tokens[i]); if (groups[i] < 0 || kMaxUserGroupId <= groups[i]) { throw new IOException("invalid group id: " + groups[i]); } } } } catch (Exception ex) { throw new IOException( "failed to parse configuration setting " + groupsCfgName + " " + ex.getMessage()); } } if (kDefaultUser != euser || kDefaultGroup != egroup || null != groups) { // Ignore errors for now. // Setting effective user and group has effect for all QFS file system // client instances withing the process / JVM. // If any other KfsAccess method invoked prior to this point // kfs_setEUserAndEGroup() will return an error. // Effective user and group ids have no effect with QFS authentication. kfsAccess.kfs_setEUserAndEGroup(euser, egroup, groups); } statistics = stats; }