Java tutorial
/** * Copyright (C) 2014 Pengfei Liu <pfliu@se.cuhk.edu.hk> * The Chinese University of Hong Kong. * * This file is part of smart-search-web. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.cuhk.hccl; import java.io.File; import java.io.IOException; import java.util.Collection; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class Indexer { public final static String CONTENT_FIELD = "content"; public final static String PATH_FIELD = "path"; public static final FieldType TERM_STORED = new FieldType(); public static StandardAnalyzer analyzer = new StandardAnalyzer(); public static Directory index = new RAMDirectory(); static { TERM_STORED.setIndexed(true); TERM_STORED.setTokenized(true); TERM_STORED.setStored(true); TERM_STORED.setStoreTermVectors(true); TERM_STORED.setStoreTermVectorPositions(true); TERM_STORED.freeze(); } /** * Create index of RAMDirectory from a data folder with text files * * @param dataSet * @throws IOException */ public static void createIndex(String dataSet) throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(index, config); Collection<File> files = FileUtils.listFiles(new File(dataSet), null, true); for (File file : files) { String path = file.getPath(); String content = FileUtils.readFileToString(file); Document doc = new Document(); doc.add(new StringField(PATH_FIELD, path, Field.Store.YES)); doc.add(new Field(CONTENT_FIELD, content, TERM_STORED)); writer.addDocument(doc); System.out.println("[INFO] Indexing file: " + path); } System.out.println("\n[INFO]" + files.size() + " files has been indexed."); writer.close(); } }