Java tutorial
/* * Copyright 2008-2009 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package voldemort.store.readonly.disk; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.Locale; import java.util.Set; import java.util.zip.GZIPOutputStream; import com.google.common.collect.Sets; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.log4j.Logger; import voldemort.VoldemortException; import voldemort.annotations.concurrency.NotThreadsafe; import voldemort.store.readonly.ReadOnlyStorageMetadata; import voldemort.store.readonly.ReadOnlyUtils; import voldemort.store.readonly.checksum.CheckSum; import voldemort.store.readonly.checksum.CheckSum.CheckSumType; import voldemort.store.readonly.checksum.CheckSumMetadata; import voldemort.store.readonly.mr.AbstractStoreBuilderConfigurable; import voldemort.store.readonly.mr.HadoopStoreBuilder; import voldemort.store.readonly.mr.azkaban.VoldemortBuildAndPushJob; import voldemort.utils.ByteUtils; public class HadoopStoreWriter extends AbstractStoreBuilderConfigurable implements KeyValueWriter<BytesWritable, BytesWritable> { private static final Logger logger = Logger.getLogger(HadoopStoreWriter.class); private Set<Integer> chunksHandled = Sets.newHashSet(); private DataOutputStream[] indexFileStream = null; private DataOutputStream[] valueFileStream = null; private int[] position; private String taskId = null; private int nodeId = -1; private int partitionId = -1; private int replicaType = -1; private Path[] taskIndexFileName; private Path[] taskValueFileName; private JobConf conf; private CheckSumType checkSumType; private CheckSum[] checkSumDigestIndex; private CheckSum[] checkSumDigestValue; private String outputDir; private FileSystem fs; private boolean isValidCompressionEnabled; /** * This variable is used to figure out the file extension for index and data * files. When the server supports compression, this variable's value is * typically ".gz" or else it holds and empty string */ private String fileExtension; /** Used to track the size of the index files produced by the reducer */ private long[] indexFileSizeInBytes; /** Used to track the size of the data files produced by the reducer */ private long[] valueFileSizeInBytes; public HadoopStoreWriter() { } @Override public void conf(JobConf job) { super.configure(job); this.conf = job; this.position = new int[getNumChunks()]; this.outputDir = job.get("final.output.dir"); this.taskId = job.get("mapred.task.id"); this.checkSumType = CheckSum.fromString(job.get(VoldemortBuildAndPushJob.CHECKSUM_TYPE)); // These arrays are sparse if reducer.per.bucket is false and num.chunks > 1 this.checkSumDigestIndex = new CheckSum[getNumChunks()]; this.checkSumDigestValue = new CheckSum[getNumChunks()]; this.taskIndexFileName = new Path[getNumChunks()]; this.taskValueFileName = new Path[getNumChunks()]; this.indexFileStream = new DataOutputStream[getNumChunks()]; this.valueFileStream = new DataOutputStream[getNumChunks()]; this.indexFileSizeInBytes = new long[getNumChunks()]; this.valueFileSizeInBytes = new long[getNumChunks()]; String compressionCodec = conf.get(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC, NO_COMPRESSION_CODEC); if (conf.getBoolean(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS, false) && compressionCodec.toUpperCase(Locale.ENGLISH).equals(this.COMPRESSION_CODEC)) { this.fileExtension = GZIP_FILE_EXTENSION; this.isValidCompressionEnabled = true; } else { this.fileExtension = ""; this.isValidCompressionEnabled = false; } } /** * The MapReduce framework should operate sequentially, so thread safety shouldn't be a problem. */ @NotThreadsafe private void initFileStreams(int chunkId) { /** * {@link Set#add(Object)} returns false if the element already existed in the set. * This ensures we initialize the resources for each chunk only once. */ if (chunksHandled.add(chunkId)) { try { this.indexFileSizeInBytes[chunkId] = 0L; this.valueFileSizeInBytes[chunkId] = 0L; this.checkSumDigestIndex[chunkId] = CheckSum.getInstance(checkSumType); this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType); this.position[chunkId] = 0; this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(conf), getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + INDEX_FILE_EXTENSION + fileExtension); this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(conf), getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + DATA_FILE_EXTENSION + fileExtension); if (this.fs == null) this.fs = this.taskIndexFileName[chunkId].getFileSystem(conf); if (isValidCompressionEnabled) { this.indexFileStream[chunkId] = new DataOutputStream(new BufferedOutputStream( new GZIPOutputStream(fs.create(this.taskIndexFileName[chunkId]), DEFAULT_BUFFER_SIZE))); this.valueFileStream[chunkId] = new DataOutputStream(new BufferedOutputStream( new GZIPOutputStream(fs.create(this.taskValueFileName[chunkId]), DEFAULT_BUFFER_SIZE))); } else { this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]); this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]); } fs.setPermission(this.taskIndexFileName[chunkId], new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + this.taskIndexFileName[chunkId]); fs.setPermission(this.taskValueFileName[chunkId], new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + this.taskValueFileName[chunkId]); logger.info("Opening " + this.taskIndexFileName[chunkId] + " and " + this.taskValueFileName[chunkId] + " for writing."); } catch (IOException e) { throw new RuntimeException("Failed to open Input/OutputStream", e); } } } @Override public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException { // Read chunk id int chunkId = ReadOnlyUtils.chunk(key.getBytes(), getNumChunks()); initFileStreams(chunkId); // Write key and position this.indexFileStream[chunkId].write(key.getBytes(), 0, key.getLength()); this.indexFileSizeInBytes[chunkId] += key.getLength(); this.indexFileStream[chunkId].writeInt(this.position[chunkId]); this.indexFileSizeInBytes[chunkId] += ByteUtils.SIZE_OF_INT; // Run key through checksum digest if (this.checkSumDigestIndex[chunkId] != null) { this.checkSumDigestIndex[chunkId].update(key.getBytes(), 0, key.getLength()); this.checkSumDigestIndex[chunkId].update(this.position[chunkId]); } short numTuples = 0; ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutputStream valueStream = new DataOutputStream(stream); while (iterator.hasNext()) { BytesWritable writable = iterator.next(); byte[] valueBytes = writable.getBytes(); int offsetTillNow = 0; /** * Below, we read the node id, partition id and replica type of each record * coming in, and validate that it is consistent with the other IDs seen so * far. This is to catch potential regressions to the shuffling logic in: * * {@link AbstractStoreBuilderConfigurable#getPartition(byte[], byte[], int)} */ // Read node Id int currentNodeId = ByteUtils.readInt(valueBytes, offsetTillNow); if (this.nodeId == -1) { this.nodeId = currentNodeId; } else if (this.nodeId != currentNodeId) { throw new IllegalArgumentException("Should not get various nodeId shuffled to us! " + "First nodeId seen: " + this.nodeId + ", currentNodeId: " + currentNodeId); } offsetTillNow += ByteUtils.SIZE_OF_INT; // Read partition id int currentPartitionId = ByteUtils.readInt(valueBytes, offsetTillNow); if (this.partitionId == -1) { this.partitionId = currentPartitionId; } else if (this.partitionId != currentPartitionId) { throw new IllegalArgumentException( "Should not get various partitionId shuffled to us! " + "First partitionId seen: " + this.partitionId + ", currentPartitionId: " + currentPartitionId); } offsetTillNow += ByteUtils.SIZE_OF_INT; // Read replica type if (getSaveKeys()) { int currentReplicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE); if (this.replicaType == -1) { this.replicaType = currentReplicaType; } else if (this.replicaType != currentReplicaType) { throw new IllegalArgumentException( "Should not get various replicaType shuffled to us! " + "First replicaType seen: " + this.replicaType + ", currentReplicaType: " + currentReplicaType); } if (getBuildPrimaryReplicasOnly() && this.replicaType > 0) { throw new IllegalArgumentException("Should not get any replicaType > 0 shuffled to us" + " when buildPrimaryReplicasOnly mode is enabled!"); } offsetTillNow += ByteUtils.SIZE_OF_BYTE; } int valueLength = writable.getLength() - offsetTillNow; if (getSaveKeys()) { // Write ( key_length, value_length, key, value ) valueStream.write(valueBytes, offsetTillNow, valueLength); } else { // Write (value_length + value) valueStream.writeInt(valueLength); valueStream.write(valueBytes, offsetTillNow, valueLength); } numTuples++; // If we have multiple values for this md5 that is a collision, // throw an exception--either the data itself has duplicates, there // are trillions of keys, or someone is attempting something // malicious ( We obviously expect collisions when we save keys ) if (!getSaveKeys() && numTuples > 1) throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength()))); } if (numTuples < 0) { // Overflow throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded " + Short.MAX_VALUE + " collisions."); } else if (numTuples > 1) { // Update number of collisions + max keys per collision reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1); long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter(); if (numTuples > numCollisions) { reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions); } } // Flush the value valueStream.flush(); byte[] value = stream.toByteArray(); // Start writing to file now // First, if save keys flag set the number of keys if (getSaveKeys()) { this.valueFileStream[chunkId].writeShort(numTuples); this.valueFileSizeInBytes[chunkId] += ByteUtils.SIZE_OF_SHORT; this.position[chunkId] += ByteUtils.SIZE_OF_SHORT; if (this.checkSumDigestValue[chunkId] != null) { this.checkSumDigestValue[chunkId].update(numTuples); } } this.valueFileStream[chunkId].write(value); this.valueFileSizeInBytes[chunkId] += value.length; this.position[chunkId] += value.length; if (this.checkSumDigestValue[chunkId] != null) { this.checkSumDigestValue[chunkId].update(value); } if (this.position[chunkId] < 0) throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + Integer.MAX_VALUE + " bytes."); } @Override public void close() throws IOException { for (int chunkId : chunksHandled) { this.indexFileStream[chunkId].close(); this.valueFileStream[chunkId].close(); } if (this.nodeId == -1 || this.partitionId == -1) { // Issue 258 - No data was read in the reduce phase, do not create // any output return; } // If the replica type read was not valid, shout out if (getSaveKeys() && this.replicaType == -1) { throw new RuntimeException("Could not read the replica type correctly for node " + nodeId + " ( partition - " + this.partitionId + " )"); } String fileNamePrefix = null; if (getSaveKeys()) { fileNamePrefix = new String( Integer.toString(this.partitionId) + "_" + Integer.toString(this.replicaType) + "_"); } else { fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"); } // Initialize the output directory String outputDirName; if (getBuildPrimaryReplicasOnly()) { outputDirName = "partition-" + this.partitionId; } else { outputDirName = "node-" + this.nodeId; } Path outputDir = new Path(this.outputDir, outputDirName); // Create output directory, if it doesn't exist FileSystem outputFs = outputDir.getFileSystem(this.conf); outputFs.mkdirs(outputDir); outputFs.setPermission(outputDir, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + outputDir); // Write the checksum and output files for (int chunkId : chunksHandled) { String chunkFileName = fileNamePrefix + Integer.toString(chunkId); CheckSumMetadata indexCheckSum = new CheckSumMetadata(); CheckSumMetadata valueCheckSum = new CheckSumMetadata(); if (this.checkSumType != CheckSumType.NONE) { if (this.checkSumDigestIndex[chunkId] != null && this.checkSumDigestValue[chunkId] != null) { indexCheckSum.add(ReadOnlyStorageMetadata.CHECKSUM, new String(Hex.encodeHex(this.checkSumDigestIndex[chunkId].getCheckSum()))); valueCheckSum.add(ReadOnlyStorageMetadata.CHECKSUM, new String(Hex.encodeHex(this.checkSumDigestValue[chunkId].getCheckSum()))); } else { throw new RuntimeException("Failed to open checksum digest for node " + nodeId + " ( partition - " + this.partitionId + ", chunk - " + chunkId + " )"); } } Path checkSumIndexFile = new Path(outputDir, chunkFileName + INDEX_FILE_EXTENSION + CHECKSUM_FILE_EXTENSION); Path checkSumValueFile = new Path(outputDir, chunkFileName + DATA_FILE_EXTENSION + CHECKSUM_FILE_EXTENSION); if (outputFs.exists(checkSumIndexFile)) { outputFs.delete(checkSumIndexFile, true); } FSDataOutputStream output = outputFs.create(checkSumIndexFile); outputFs.setPermission(checkSumIndexFile, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); indexCheckSum.add(CheckSumMetadata.INDEX_FILE_SIZE_IN_BYTES, Long.toString(this.indexFileSizeInBytes[chunkId])); output.write(indexCheckSum.toJsonString().getBytes()); output.close(); if (outputFs.exists(checkSumValueFile)) { outputFs.delete(checkSumValueFile, true); } output = outputFs.create(checkSumValueFile); outputFs.setPermission(checkSumValueFile, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); valueCheckSum.add(CheckSumMetadata.DATA_FILE_SIZE_IN_BYTES, Long.toString(this.valueFileSizeInBytes[chunkId])); output.write(valueCheckSum.toJsonString().getBytes()); output.close(); // Generate the final chunk files and add file size information Path indexFile = new Path(outputDir, chunkFileName + INDEX_FILE_EXTENSION + fileExtension); Path valueFile = new Path(outputDir, chunkFileName + DATA_FILE_EXTENSION + fileExtension); logger.info("Moving " + this.taskIndexFileName[chunkId] + " to " + indexFile); if (outputFs.exists(indexFile)) { outputFs.delete(indexFile, true); } fs.rename(taskIndexFileName[chunkId], indexFile); logger.info("Moving " + this.taskValueFileName[chunkId] + " to " + valueFile); if (outputFs.exists(valueFile)) { outputFs.delete(valueFile, true); } fs.rename(this.taskValueFileName[chunkId], valueFile); } } }