Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sensei.indexing.hadoop.reduce; import java.io.File; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Trash; import org.apache.log4j.Logger; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.sensei.indexing.hadoop.keyvalueformat.IntermediateForm; import com.sensei.indexing.hadoop.keyvalueformat.Shard; import com.sensei.indexing.hadoop.util.LuceneIndexFileNameFilter; import com.sensei.indexing.hadoop.util.SenseiJobConfig; /** * The initial version of an index is stored in the perm dir. Index files * created by newer versions are written to a temp dir on the local FS. After * successfully creating the new version in the temp dir, the shard writer * moves the new files to the perm dir and deletes the temp dir in close(). */ public class ShardWriter { private static Logger logger = Logger.getLogger(ShardWriter.class); private final FileSystem fs; private final FileSystem localFs; private final Path perm; private final Path temp; // private final Directory dir; private final IndexWriter writer; private int maxNumSegments; private long numForms = 0; private Configuration iconf; /** * Constructor * @param fs * @param shard * @param tempDir * @param iconf * @throws IOException */ public ShardWriter(FileSystem fs, Shard shard, String tempDir, Configuration iconf) throws IOException { logger.info("Construct a shard writer"); this.iconf = iconf; this.fs = fs; localFs = FileSystem.getLocal(iconf); perm = new Path(shard.getDirectory()); temp = new Path(tempDir); long initGeneration = shard.getGeneration(); if (localFs.exists(temp)) { File tempFile = new File(temp.getName()); if (tempFile.exists()) SenseiReducer.deleteDir(tempFile); } if (!fs.exists(perm)) { assert (initGeneration < 0); fs.mkdirs(perm); } else { moveToTrash(iconf, perm); fs.mkdirs(perm); // restoreGeneration(fs, perm, initGeneration); } // dir = //new FileSystemDirectory(fs, perm, false, iconf.getConfiguration()); // new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp), // iconf); // analyzer is null because we only use addIndexes, not addDocument // writer = // new IndexWriter(dir, null, // initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy() : new MixedDeletionPolicy(), // MaxFieldLength.UNLIMITED); // writer = new IndexWriter(dir, null, new KeepOnlyLastCommitDeletionPolicy(), MaxFieldLength.UNLIMITED); writer = new IndexWriter(FSDirectory.open(new File(tempDir)), null, new KeepOnlyLastCommitDeletionPolicy(), MaxFieldLength.UNLIMITED); setParameters(iconf); // dir = null; // writer = null; } /** * Process an intermediate form by carrying out, on the Lucene instance of * the shard, the deletes and the inserts (a ram index) in the form. * @param form the intermediate form containing deletes and a ram index * @throws IOException */ public void process(IntermediateForm form) throws IOException { writer.addIndexesNoOptimize(new Directory[] { form.getDirectory() }); numForms++; } /** * Close the shard writer. Optimize the Lucene instance of the shard before * closing if necessary, and copy the files created in the temp directory * to the permanent directory after closing. * @throws IOException */ public void close() throws IOException { logger.info("Closing the shard writer, processed " + numForms + " forms"); try { try { if (maxNumSegments > 0) { writer.optimize(maxNumSegments); logger.info("Optimized the shard into at most " + maxNumSegments + " segments"); } } finally { writer.close(); logger.info("Closed Lucene index writer"); } moveFromTempToPerm(); logger.info("Moved new index files to " + perm); } finally { // dir.close(); logger.info("Closed the shard writer"); } } /* (non-Javadoc) * @see java.lang.Object#toString() */ public String toString() { return this.getClass().getName() + "@" + perm + "&" + temp; } private void setParameters(Configuration conf) { int maxFieldLength = conf.getInt(SenseiJobConfig.MAX_FIELD_LENGTH, -1); if (maxFieldLength > 0) { writer.setMaxFieldLength(maxFieldLength); } writer.setUseCompoundFile(conf.getBoolean(SenseiJobConfig.USE_COMPOUND_FILE, false)); maxNumSegments = conf.getInt(SenseiJobConfig.MAX_NUM_SEGMENTS, -1); if (maxFieldLength > 0) { logger.info(SenseiJobConfig.MAX_FIELD_LENGTH + " = " + writer.getMaxFieldLength()); } logger.info(SenseiJobConfig.USE_COMPOUND_FILE + " = " + writer.getUseCompoundFile()); logger.info(SenseiJobConfig.MAX_NUM_SEGMENTS + " = " + maxNumSegments); } private void moveFromTempToPerm() throws IOException { FileStatus[] fileStatus = localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter()); // move the files created in temp dir except segments_N and segments.gen for (int i = 0; i < fileStatus.length; i++) { Path path = fileStatus[i].getPath(); String name = path.getName(); // if (fs.exists(new Path(perm, name))) { // moveToTrash(iconf, perm); // } // // fs.copyFromLocalFile(path, new Path(perm, name)); try { if (!fs.exists(new Path(perm, name))) { fs.copyFromLocalFile(path, new Path(perm, name)); } else { moveToTrash(iconf, perm); fs.copyFromLocalFile(path, new Path(perm, name)); } } catch (Exception e) { ; } } } public void optimize() { try { writer.optimize(); } catch (CorruptIndexException e) { logger.error("Corrupt Index error. ", e); } catch (IOException e) { logger.error("IOException during index optimization. ", e); } } public static void moveToTrash(Configuration conf, Path path) throws IOException { Trash t = new Trash(conf); boolean isMoved = t.moveToTrash(path); t.expunge(); if (!isMoved) { logger.error("Trash is not enabled or file is already in the trash."); } } }