Java tutorial
/* * Copyright 2013 Websquared, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fastcatsearch.ir.document; import org.apache.commons.io.input.BoundedInputStream; import org.apache.lucene.util.BytesRef; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.common.IndexFileNames; import org.fastcatsearch.ir.config.IndexConfig; import org.fastcatsearch.ir.field.Field; import org.fastcatsearch.ir.field.FieldDataParseException; import org.fastcatsearch.ir.index.IndexWriteInfoList; import org.fastcatsearch.ir.index.WriteInfoLoggable; import org.fastcatsearch.ir.io.*; import org.fastcatsearch.ir.settings.FieldSetting; import org.fastcatsearch.ir.settings.SchemaSetting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.channels.Channels; import java.util.List; import java.util.zip.Deflater; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; /** * ?. * ? Document? ? ? ?. * * ???? 2?? ???, ??? ? ??. * * append?? ? ? ? ?. 0. * ? DocumentReader? ?? ? ? docbaseNo? * ?. * * @author sangwook.song * */ public class DocumentWriter implements WriteInfoLoggable { private static Logger logger = LoggerFactory.getLogger(DocumentWriter.class); private List<FieldSetting> fields; private BufferedFileOutput docOutput; private BufferedFileOutput positionOutput; private byte[] workingBuffer; private BytesDataOutput fbaos; private int totalCount; //?. private Deflater compressor; private int count; // ? . private static final int INFLATE_BUFFER_INIT_SIZE = 20 * 1024; private ByteRefArrayOutputStream inflaterOutput; public DocumentWriter(SchemaSetting schemaSetting, File dir, IndexConfig indexConfig) throws IOException, IRException { compressor = new Deflater(Deflater.BEST_SPEED); fields = schemaSetting.getFieldSettingList(); docOutput = new BufferedFileOutput(dir, IndexFileNames.docStored); positionOutput = new BufferedFileOutput(dir, IndexFileNames.docPosition); fbaos = new BytesDataOutput(3 * 1024 * 1024); // 3Mb . workingBuffer = new byte[1024]; docOutput.writeInt(0); // document count inflaterOutput = new ByteRefArrayOutputStream(INFLATE_BUFFER_INIT_SIZE); } public int write(Document document) throws IOException, IRException { fbaos.reset(); long docStartPosition = docOutput.position(); positionOutput.writeLong(docStartPosition); for (int i = 0; i < document.size(); i++) { Field f = document.get(i); // null? ?? FieldSetting fieldSetting = fields.get(i); if (f == null || f.isNull() || !fieldSetting.isStore()) { fbaos.writeBoolean(false); } else { fbaos.writeBoolean(true); //TODO . ?? ?? . // fbaos.writeBoolean(fieldSetting.isCompress()); f.writeRawTo(fbaos); } } compressor.reset(); compressor.setInput(fbaos.array(), 0, (int) fbaos.position()); compressor.finish(); long pos = docOutput.position(); docOutput.writeInt(0); // ??? ?. int compressedDataLength = 0; while (!compressor.finished()) { int count = compressor.deflate(workingBuffer); docOutput.writeBytes(workingBuffer, 0, count); compressedDataLength += count; } long lastPos = docOutput.position(); // ?? ?? ?. docOutput.seek(pos); docOutput.writeInt(compressedDataLength); docOutput.seek(lastPos); count++; return totalCount++; } public Document readDocument(int docNo) throws IOException, IRException { long prevPosPos = positionOutput.position(); long docPos = -1; try { long positionOffset = ((long) docNo) * IOUtil.SIZE_OF_LONG; positionOutput.seek(positionOffset); docPos = IOUtil.readLong(positionOutput.getRaf()); } finally { positionOutput.seek(prevPosPos); } // find a document block long prevDocPos = docOutput.position(); try { docOutput.seek(docPos); RandomAccessFile raf = docOutput.getRaf(); int len = IOUtil.readInt(raf); long n = raf.getFilePointer(); InputStream docInput = Channels.newInputStream(docOutput.getRaf().getChannel().position(n)); //2014-11-26 ? working ? ? ? GC ?? OOM ? ?. // Stream . InflaterInputStream decompressInputStream = null; inflaterOutput.reset(); int count = -1; try { BoundedInputStream boundedInputStream = new BoundedInputStream(docInput, len); boundedInputStream.setPropagateClose(false);// docInput . decompressInputStream = new InflaterInputStream(boundedInputStream, new Inflater(), 512); while ((count = decompressInputStream.read(workingBuffer)) != -1) { inflaterOutput.write(workingBuffer, 0, count); } } finally { decompressInputStream.close(); } } finally { docOutput.seek(prevDocPos); } BytesRef bytesRef = inflaterOutput.getBytesRef(); DataInput bai = new BytesDataInput(bytesRef.bytes, 0, bytesRef.length); Document document = new Document(fields.size()); for (int i = 0; i < fields.size(); i++) { FieldSetting fs = fields.get(i); Field f = null; boolean hasValue = bai.readBoolean(); if (hasValue) { f = fs.createEmptyField(); f.readRawFrom(bai); } else { f = fs.createEmptyField(); } if (f != null) { String multiValueDelimiter = fs.getMultiValueDelimiter(); try { f.parseIndexable(multiValueDelimiter); } catch (FieldDataParseException e) { throw new IOException(e); } } document.add(f); } document.setDocId(docNo); return document; } public int totalCount() { return totalCount; } public void close() throws IOException { logger.debug("DocumentWriter close() count={}", count); // write header if (count > 0) { docOutput.seek(0); docOutput.writeInt(totalCount); } docOutput.close(); positionOutput.close(); } @Override public void getIndexWriteInfo(IndexWriteInfoList writeInfoList) { writeInfoList.setDocumentSize(totalCount); writeInfoList.add(docOutput.getWriteInfo()); writeInfoList.add(positionOutput.getWriteInfo()); } }