Java tutorial
/* * Copyright 2013 Websquared, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fastcatsearch.ir.document; import java.io.File; import java.io.IOException; import java.util.List; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; import org.apache.commons.io.input.BoundedInputStream; import org.apache.lucene.util.BytesRef; import org.fastcatsearch.ir.common.IndexFileNames; import org.fastcatsearch.ir.field.Field; import org.fastcatsearch.ir.field.FieldDataParseException; import org.fastcatsearch.ir.io.BufferedFileInput; import org.fastcatsearch.ir.io.ByteRefArrayOutputStream; import org.fastcatsearch.ir.io.BytesDataInput; import org.fastcatsearch.ir.io.DataInput; import org.fastcatsearch.ir.io.IOUtil; import org.fastcatsearch.ir.io.IndexInput; import org.fastcatsearch.ir.settings.FieldSetting; import org.fastcatsearch.ir.settings.SchemaSetting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * 0 read. baseNo ? . * */ public class DocumentReader implements Cloneable { private static Logger logger = LoggerFactory.getLogger(DocumentReader.class); private static final int INFLATE_BUFFER_INIT_SIZE = 20 * 1024; private List<FieldSetting> fields; private IndexInput docInput; private IndexInput positionInput; private ByteRefArrayOutputStream inflaterOutput; private byte[] workingBuffer; private int baseDocNo; private int documentCount; private int lastDocNo = -1; private DataInput lastBai; private long positionLimit; public DocumentReader() { } public DocumentReader(SchemaSetting schemaSetting, File dir) throws IOException { this(schemaSetting, dir, 0); } public DocumentReader(SchemaSetting schemaSetting, File dir, int baseDocNo) throws IOException { this.baseDocNo = baseDocNo; fields = schemaSetting.getFieldSettingList(); docInput = new BufferedFileInput(dir, IndexFileNames.docStored); positionInput = new BufferedFileInput(dir, IndexFileNames.docPosition); positionLimit = positionInput.length(); documentCount = docInput.readInt(); logger.info("DocumentCount = {}", documentCount); inflaterOutput = new ByteRefArrayOutputStream(INFLATE_BUFFER_INIT_SIZE); // ?? ??. 20KB . ? 3MB?. workingBuffer = new byte[1024]; } public int getDocumentCount() { return documentCount; } public int getBaseNumber() { return baseDocNo; } // . public Document readDocument(int docNo) throws IOException { return readDocument(docNo, null); } public Document readIndexableDocument(int docNo) throws IOException { return readDocument(docNo, null, true); } public Document readDocument(int docNo, boolean[] fieldSelectOption) throws IOException { return readDocument(docNo, fieldSelectOption, false); } public Document readDocument(int docNo, boolean[] fieldSelectOption, boolean indexable) throws IOException { // if(docNo < baseDocNo) throw new // IOException("Request docNo cannot less than baseDocNo! docNo = "+docNo+", baseDocNo = "+baseDocNo); // baseDocNo? . // docNo -= baseDocNo; DataInput bai = null; if (docNo != lastDocNo) { long positionOffset = docNo * IOUtil.SIZE_OF_LONG; if (positionOffset >= positionLimit) { //. return null; } positionInput.seek(positionOffset); long pos = positionInput.readLong(); // find a document block docInput.seek(pos); int len = docInput.readInt(); //2014-11-26 ? working ? ? ? GC ?? OOM ? ?. // Stream . InflaterInputStream decompressInputStream = null; inflaterOutput.reset(); int count = -1; try { BoundedInputStream boundedInputStream = new BoundedInputStream(docInput, len); boundedInputStream.setPropagateClose(false);// docInput . decompressInputStream = new InflaterInputStream(boundedInputStream, new Inflater(), 512); while ((count = decompressInputStream.read(workingBuffer)) != -1) { inflaterOutput.write(workingBuffer, 0, count); } } finally { decompressInputStream.close(); } BytesRef bytesRef = inflaterOutput.getBytesRef(); bai = new BytesDataInput(bytesRef.bytes, 0, bytesRef.length); lastDocNo = docNo; lastBai = bai; } else { lastBai.reset(); bai = lastBai; } Document document = new Document(fields.size()); for (int i = 0; i < fields.size(); i++) { FieldSetting fs = fields.get(i); Field f = null; boolean hasValue = bai.readBoolean(); // logger.debug("read hasValue={}, select={}, fs={} ", hasValue, fieldSelectOption, fs); if (hasValue) { //1. fieldSelectOption ? ? ??. //2. ? , true? ? ?. if (fieldSelectOption == null || (fieldSelectOption != null && fieldSelectOption[i])) { f = fs.createEmptyField(); f.readRawFrom(bai); } else { bai.skipVIntData(); } // logger.debug("fill {} >> {}", i, f); } else { //? ? . f = fs.createEmptyField(); // logger.debug("fill {} >> empty", i); } if (f != null && indexable) { String multiValueDelimiter = fs.getMultiValueDelimiter(); try { f.parseIndexable(multiValueDelimiter); } catch (FieldDataParseException e) { throw new IOException(e); } } document.set(i, f); } document.setDocId(docNo + baseDocNo); return document; } @Override public DocumentReader clone() { DocumentReader reader = new DocumentReader(); reader.fields = fields; reader.docInput = docInput.clone(); reader.positionInput = positionInput.clone(); reader.baseDocNo = baseDocNo; reader.documentCount = documentCount; reader.inflaterOutput = new ByteRefArrayOutputStream(INFLATE_BUFFER_INIT_SIZE); // ?? ??. reader.workingBuffer = new byte[1024]; reader.positionLimit = positionLimit; return reader; } public void close() throws IOException { docInput.close(); positionInput.close(); } }