List of usage examples for java.io DataInputStream read
public final int read(byte b[], int off, int len) throws IOException
len
bytes of data from the contained input stream into an array of bytes. From source file:com.splout.db.dnode.HttpFileExchanger.java
@Override public void handle(HttpExchange exchange) throws IOException { DataInputStream iS = null; FileOutputStream writer = null; File dest = null;//from w ww .j av a2s . c o m String tablespace = null; Integer partition = null; Long version = null; try { iS = new DataInputStream(new GZIPInputStream(exchange.getRequestBody())); String fileName = exchange.getRequestHeaders().getFirst("filename"); tablespace = exchange.getRequestHeaders().getFirst("tablespace"); partition = Integer.valueOf(exchange.getRequestHeaders().getFirst("partition")); version = Long.valueOf(exchange.getRequestHeaders().getFirst("version")); dest = new File( new File(tempDir, DNodeHandler.getLocalStoragePartitionRelativePath(tablespace, partition, version)), fileName); // just in case, avoid copying the same file concurrently // (but we also shouldn't avoid this in other levels of the app) synchronized (currentTransfersMonitor) { if (currentTransfers.containsKey(dest.toString())) { throw new IOException("Incoming file already being transferred - " + dest); } currentTransfers.put(dest.toString(), new Object()); } if (!dest.getParentFile().exists()) { dest.getParentFile().mkdirs(); } if (dest.exists()) { dest.delete(); } writer = new FileOutputStream(dest); byte[] buffer = new byte[config.getInt(FetcherProperties.DOWNLOAD_BUFFER)]; Checksum checkSum = new CRC32(); // 1- Read file size long fileSize = iS.readLong(); log.debug("Going to read file [" + fileName + "] of size: " + fileSize); // 2- Read file contents long readSoFar = 0; do { long missingBytes = fileSize - readSoFar; int bytesToRead = (int) Math.min(missingBytes, buffer.length); int read = iS.read(buffer, 0, bytesToRead); checkSum.update(buffer, 0, read); writer.write(buffer, 0, read); readSoFar += read; callback.onProgress(tablespace, partition, version, dest, fileSize, readSoFar); } while (readSoFar < fileSize); // 3- Read CRC long expectedCrc = iS.readLong(); if (expectedCrc == checkSum.getValue()) { log.info("File [" + dest.getAbsolutePath() + "] received -> Checksum -- " + checkSum.getValue() + " matches expected CRC [OK]"); callback.onFileReceived(tablespace, partition, version, dest); } else { log.error("File received [" + dest.getAbsolutePath() + "] -> Checksum -- " + checkSum.getValue() + " doesn't match expected CRC: " + expectedCrc); callback.onBadCRC(tablespace, partition, version, dest); dest.delete(); } } catch (Throwable t) { log.error(t); callback.onError(t, tablespace, partition, version, dest); if (dest != null && dest.exists() && !t.getMessage().contains("Incoming file already being transferred")) { dest.delete(); } } finally { if (writer != null) { writer.close(); } if (iS != null) { iS.close(); } if (dest != null) { currentTransfers.remove(dest.toString()); } } }
From source file:edu.cmu.lemurproject.WarcRecord.java
private static byte[] readNextRecord(DataInputStream in, StringBuffer headerBuffer) throws IOException { if (in == null) { return null; }/* w w w .j av a 2 s . c o m*/ if (headerBuffer == null) { return null; } String line = null; boolean foundMark = false; byte[] retContent = null; // cannot be using a buffered reader here!!!! // just read the header // first - find our WARC header while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) { if (line.startsWith(WARC_VERSION)) { WARC_VERSION_LINE = line; foundMark = true; } } // no WARC mark? if (!foundMark) { return null; } //LOG.info("Found WARC_VERSION"); int contentLength = -1; // read until we see contentLength then an empty line // (to handle malformed ClueWeb09 headers that have blank lines) // get the content length and set our retContent for (line = readLineFromInputStream(in).trim(); line.length() > 0 || contentLength < 0; line = readLineFromInputStream(in).trim()) { if (line.length() > 0) { headerBuffer.append(line); headerBuffer.append(LINE_ENDING); // find the content length designated by Content-Length: <length> String[] parts = line.split(":", 2); if (parts.length == 2 && parts[0].equals("Content-Length")) { try { contentLength = Integer.parseInt(parts[1].trim()); //LOG.info("WARC record content length: " + contentLength); } catch (NumberFormatException nfEx) { contentLength = -1; } } } } // now read the bytes of the content retContent = new byte[contentLength]; int totalWant = contentLength; int totalRead = 0; // // LOOP TO REMOVE LEADING CR * LF // To prevent last few characters from being cut off of the content // when reading // while ((totalRead == 0) && (totalRead < contentLength)) { byte CR = in.readByte(); byte LF = in.readByte(); if ((CR != 13) && (LF != 10)) { retContent[0] = CR; // Minor change to process Common Crawl WET files. // Handle conversion records with single character line endings. if (retContent.length > 1) { retContent[1] = LF; totalRead = 2; } else totalRead = 1; totalWant = contentLength - totalRead; } } // // // while (totalRead < contentLength) { try { int numRead = in.read(retContent, totalRead, totalWant); if (numRead < 0) { return null; } else { totalRead += numRead; totalWant = contentLength - totalRead; } // end if (numRead < 0) / else } catch (EOFException eofEx) { // resize to what we have if (totalRead > 0) { byte[] newReturn = new byte[totalRead]; System.arraycopy(retContent, 0, newReturn, 0, totalRead); return newReturn; } else { return null; } } // end try/catch (EOFException) } // end while (totalRead < contentLength) return retContent; }
From source file:org.lemurproject.galago.core.parse.WARCRecord.java
private static byte[] readNextRecord(DataInputStream in, StringBuffer headerBuffer) throws IOException { if (in == null) { return null; }//from www . j a va 2 s .co m if (headerBuffer == null) { return null; } String line = null; // boolean foundMark = false; byte[] retContent = null; boolean foundMark = findNextWARCRecord(in); // // cannot be using a buffered reader here!!!! // // just read the header // // first - find our WARC header // while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) { // if (line.startsWith(WARC_VERSION)) { // WARC_VERSION_LINE = line; // foundMark = true; // } // } // no WARC mark? if (!foundMark) { return null; } // LOG.info("Found WARC_VERSION"); int contentLength = -1; // read until we see contentLength then an empty line // (to handle malformed ClueWeb09 headers that have blank lines) // get the content length and set our retContent for (line = readLineFromInputStream(in).trim(); line.length() > 0 || contentLength < 0; line = readLineFromInputStream(in).trim()) { if (line.length() > 0) { headerBuffer.append(line); headerBuffer.append(LINE_ENDING); // find the content length designated by Content-Length: <length> String[] parts = line.split(":", 2); if (parts.length == 2 && parts[0].equals("Content-Length")) { try { contentLength = Integer.parseInt(parts[1].trim()); // LOG.info("WARC record content length: " + contentLength); // if this document is too long if (contentLength > MAX_CONTENT_LENGTH) { in.skip(contentLength); if (!findNextWARCRecord(in)) { return null; } headerBuffer.delete(0, headerBuffer.length()); } } catch (NumberFormatException nfEx) { contentLength = -1; } } } } // now read the bytes of the content retContent = new byte[contentLength]; int totalWant = contentLength; int totalRead = 0; // // LOOP TO REMOVE LEADING CR * LF // To prevent last few characters from being cut off of the content // when reading // while ((totalRead == 0) && (totalRead < contentLength)) { byte CR = in.readByte(); byte LF = in.readByte(); if ((CR != 13) && (LF != 10)) { retContent[0] = CR; retContent[1] = LF; totalRead = 2; totalWant = contentLength - totalRead; } } // // // while (totalRead < contentLength) { try { int numRead = in.read(retContent, totalRead, totalWant); if (numRead < 0) { return null; } else { totalRead += numRead; totalWant = contentLength - totalRead; } // end if (numRead < 0) / else } catch (EOFException eofEx) { // resize to what we have if (totalRead > 0) { byte[] newReturn = new byte[totalRead]; System.arraycopy(retContent, 0, newReturn, 0, totalRead); return newReturn; } else { return null; } } // end try/catch (EOFException) } // end while (totalRead < contentLength) return retContent; }
From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*from w w w. j av a 2 s. c om*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int staffNum = this.staff.getStaffNum(); BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART); int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[] buffer = new byte[bufferSize]; int bufindex = 0; SerializationFactory sFactory = new SerializationFactory(new Configuration()); Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class); byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER]; int psindex = 0; BytesWritable pidbytes = new BytesWritable(); int psize = 0; BytesWritable sizebytes = new BytesWritable(); int ssize = 0; try { this.keyserializer.open(bb); this.valueserializer.open(bb); psserializer.open(bb); } catch (IOException e) { throw e; } String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID(); File dir = new File("/tmp/bcbsp/" + this.staff.getJobID()); dir.mkdir(); dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID()); dir.mkdir(); ArrayList<File> files = new ArrayList<File>(); try { File file = new File(path + "/" + "data" + ".txt"); files.add(file); DataOutputStream dataWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true))); DataInputStream dataReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt"))); File filet = new File(path + "/" + "pidandsize" + ".txt"); files.add(filet); DataOutputStream psWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true))); DataInputStream psReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt"))); while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (this.counter.containsKey(pid)) { this.counter.put(pid, (this.counter.get(pid) + 1)); } else { this.counter.put(pid, 1); } bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(ksize + vsize)); sizebytes.set(bb.getData(), 0, bb.getLength()); ssize = sizebytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(pid)); pidbytes.set(bb.getData(), 0, bb.getLength()); psize = pidbytes.getLength(); if ((pidandsize.length - psindex) > (ssize + psize)) { System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } else { psWriter.write(pidandsize, 0, psindex); psindex = 0; System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } if ((buffer.length - bufindex) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } else if (buffer.length < (ksize + vsize)) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; LOG.info("This is a super record"); dataWriter.write(kbytes.getBytes(), 0, ksize); dataWriter.write(vbytes.getBytes(), 0, vsize); } else { dataWriter.write(buffer, 0, bufindex); bufindex = 0; System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } } if (psindex != 0) { psWriter.write(pidandsize, 0, psindex); } if (bufindex != 0) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; } dataWriter.close(); dataWriter = null; psWriter.close(); psWriter = null; buffer = null; pidandsize = null; this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(this.counter); HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc, Constants.PARTITION_TYPE.HASH); this.staff.setHashBucketToPartition(hashBucketToPartition); byte[][] databuf = new byte[staffNum][dataBufferSize]; int[] databufindex = new int[staffNum]; try { IntWritable pid = new IntWritable(); IntWritable size = new IntWritable(); int belongPid = 0; while (true) { size.readFields(psReader); pid.readFields(psReader); belongPid = hashBucketToPartition.get(pid.get()); if (belongPid != this.staff.getPartition()) { send++; } else { local++; } if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) { dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } else if (databuf[belongPid].length < size.get()) { LOG.info("This is a super record"); byte[] tmp = new byte[size.get()]; dataReader.read(tmp, 0, size.get()); if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(tmp))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(tmp, 0, size.get()); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } tmp = null; } else { if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream(new BufferedInputStream( new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(databuf[belongPid], 0, databufindex[belongPid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } databufindex[belongPid] = 0; dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } } } catch (EOFException ex) { LOG.error("[write]", ex); } for (int i = 0; i < staffNum; i++) { if (databufindex[i] != 0) { if (i == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(databuf[i], 0, databufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } } dataReader.close(); dataReader = null; psReader.close(); psReader = null; for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); tpool.cleanup(); tpool = null; databuf = null; databufindex = null; this.counter = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } finally { for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); } }