Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hyracks.storage.am.lsm.common.impls; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.text.Format; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashSet; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.io.FileReference; import org.apache.hyracks.api.io.IIOManager; import org.apache.hyracks.storage.am.common.api.ITreeIndex; import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame; import org.apache.hyracks.storage.am.common.api.ITreeIndexMetadataFrame; import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager; import org.apache.hyracks.storage.common.buffercache.IBufferCache; import org.apache.hyracks.storage.common.buffercache.ICachedPage; import org.apache.hyracks.storage.common.file.BufferedFileHandle; import org.apache.hyracks.storage.common.file.IFileMapProvider; public abstract class AbstractLSMIndexFileManager implements ILSMIndexFileManager { public static final String SPLIT_STRING = "_"; protected static final String BLOOM_FILTER_STRING = "f"; protected static final String TRANSACTION_PREFIX = ".T"; public enum TreeIndexState { INVALID, VERSION_MISMATCH, VALID } protected final IIOManager ioManager; protected final IFileMapProvider fileMapProvider; // baseDir should reflect dataset name and partition name and be absolute protected String baseDir; protected final Format formatter = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss-SSS"); protected final Comparator<String> cmp = new FileNameComparator(); protected final Comparator<ComparableFileName> recencyCmp = new RecencyComparator(); protected final TreeIndexFactory<? extends ITreeIndex> treeFactory; private String prevTimestamp = null; public AbstractLSMIndexFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider, FileReference file, TreeIndexFactory<? extends ITreeIndex> treeFactory) { this.ioManager = ioManager; this.baseDir = file.getFile().getAbsolutePath(); if (!baseDir.endsWith(System.getProperty("file.separator"))) { baseDir += System.getProperty("file.separator"); } this.fileMapProvider = fileMapProvider; this.treeFactory = treeFactory; } private static FilenameFilter fileNameFilter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !name.startsWith("."); } }; protected TreeIndexState isValidTreeIndex(ITreeIndex treeIndex) throws HyracksDataException { IBufferCache bufferCache = treeIndex.getBufferCache(); treeIndex.activate(); try { int metadataPage = treeIndex.getPageManager().getMetadataPageId(); if (metadataPage < 0) { return TreeIndexState.INVALID; } ITreeIndexMetadataFrame metadataFrame = treeIndex.getPageManager().createMetadataFrame(); ICachedPage page = bufferCache .pin(BufferedFileHandle.getDiskPageId(treeIndex.getFileId(), metadataPage), false); page.acquireReadLatch(); try { metadataFrame.setPage(page); if (!metadataFrame.isValid()) { return TreeIndexState.INVALID; } else if (metadataFrame.getVersion() != ITreeIndexFrame.Constants.VERSION) { return TreeIndexState.VERSION_MISMATCH; } else { return TreeIndexState.VALID; } } finally { page.releaseReadLatch(); bufferCache.unpin(page); } } finally { treeIndex.deactivate(); } } protected void cleanupAndGetValidFilesInternal(FilenameFilter filter, TreeIndexFactory<? extends ITreeIndex> treeFactory, ArrayList<ComparableFileName> allFiles) throws HyracksDataException { String[] files = listDirFiles(baseDir, filter); File dir = new File(baseDir); for (String fileName : files) { FileReference fileRef = ioManager.resolveAbsolutePath(dir.getPath() + File.separator + fileName); if (treeFactory == null) { allFiles.add(new ComparableFileName(fileRef)); continue; } TreeIndexState idxState = isValidTreeIndex(treeFactory.createIndexInstance(fileRef)); if (idxState == TreeIndexState.VALID) { allFiles.add(new ComparableFileName(fileRef)); } else if (idxState == TreeIndexState.INVALID) { fileRef.delete(); } } } static String[] listDirFiles(String path, FilenameFilter filter) throws HyracksDataException { File dir = new File(path); /* * Returns null if this abstract pathname does not denote a directory, or if an I/O error occurs. */ String[] files = dir.list(filter); if (files == null) { if (!dir.canRead()) { throw HyracksDataException.create(ErrorCode.CANNOT_READ_FILE, path); } else if (!dir.exists()) { throw HyracksDataException.create(ErrorCode.FILE_DOES_NOT_EXISTS, path); } else if (!dir.isDirectory()) { throw HyracksDataException.create(ErrorCode.FILE_IS_NOT_DIRECTORY, path); } throw HyracksDataException.create(ErrorCode.UNIDENTIFIED_IO_ERROR_READING_FILE, path); } return files; } protected void validateFiles(HashSet<String> groundTruth, ArrayList<ComparableFileName> validFiles, FilenameFilter filter, TreeIndexFactory<? extends ITreeIndex> treeFactory) throws HyracksDataException { ArrayList<ComparableFileName> tmpAllInvListsFiles = new ArrayList<>(); cleanupAndGetValidFilesInternal(filter, treeFactory, tmpAllInvListsFiles); for (ComparableFileName cmpFileName : tmpAllInvListsFiles) { int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING); String file = cmpFileName.fileName.substring(0, index); if (groundTruth.contains(file)) { validFiles.add(cmpFileName); } else { File invalidFile = new File(cmpFileName.fullPath); invalidFile.delete(); } } } @Override public void createDirs() { File f = new File(baseDir); f.mkdirs(); } @Override public void deleteDirs() throws HyracksDataException { File f = new File(baseDir); if (f.exists()) { delete(f); } } private void delete(File f) throws HyracksDataException { if (!FileUtils.deleteQuietly(f)) { throw HyracksDataException.create(ErrorCode.UNIDENTIFIED_IO_ERROR_DELETING_DIR, f.getPath()); } } protected static FilenameFilter bloomFilterFilter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !name.startsWith(".") && name.endsWith(BLOOM_FILTER_STRING); } }; protected FileReference createFlushFile(String flushFileName) throws HyracksDataException { return ioManager.resolveAbsolutePath(flushFileName); } protected FileReference createMergeFile(String mergeFileName) throws HyracksDataException { return createFlushFile(mergeFileName); } @Override public LSMComponentFileReferences getRelFlushFileReference() throws HyracksDataException { String ts = getCurrentTimestamp(); // Begin timestamp and end timestamp are identical since it is a flush return new LSMComponentFileReferences(createFlushFile(baseDir + ts + SPLIT_STRING + ts), null, null); } @Override public LSMComponentFileReferences getRelMergeFileReference(String firstFileName, String lastFileName) throws HyracksDataException { String[] firstTimestampRange = firstFileName.split(SPLIT_STRING); String[] lastTimestampRange = lastFileName.split(SPLIT_STRING); // Get the range of timestamps by taking the earliest and the latest timestamps return new LSMComponentFileReferences( createMergeFile(baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1]), null, null); } @Override public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException { List<LSMComponentFileReferences> validFiles = new ArrayList<>(); ArrayList<ComparableFileName> allFiles = new ArrayList<>(); // Gather files and delete invalid files // There are two types of invalid files: // (1) The isValid flag is not set // (2) The file's interval is contained by some other file // Here, we only filter out (1). cleanupAndGetValidFilesInternal(fileNameFilter, treeFactory, allFiles); if (allFiles.isEmpty()) { return validFiles; } if (allFiles.size() == 1) { validFiles.add(new LSMComponentFileReferences(allFiles.get(0).fileRef, null, null)); return validFiles; } // Sorts files names from earliest to latest timestamp. Collections.sort(allFiles); List<ComparableFileName> validComparableFiles = new ArrayList<>(); ComparableFileName last = allFiles.get(0); validComparableFiles.add(last); for (int i = 1; i < allFiles.size(); i++) { ComparableFileName current = allFiles.get(i); // The current start timestamp is greater than last stop timestamp so current is valid. if (current.interval[0].compareTo(last.interval[1]) > 0) { validComparableFiles.add(current); last = current; } else if (current.interval[0].compareTo(last.interval[0]) >= 0 && current.interval[1].compareTo(last.interval[1]) <= 0) { // The current file is completely contained in the interval of the // last file. Thus the last file must contain at least as much information // as the current file, so delete the current file. current.fileRef.delete(); } else { // This scenario should not be possible since timestamps are monotonically increasing. throw new HyracksDataException("Found LSM files with overlapping timestamp intervals, " + "but the intervals were not contained by another file."); } } // Sort valid files in reverse lexicographical order, such that newer files come first. Collections.sort(validComparableFiles, recencyCmp); for (ComparableFileName cmpFileName : validComparableFiles) { validFiles.add(new LSMComponentFileReferences(cmpFileName.fileRef, null, null)); } return validFiles; } @Override public Comparator<String> getFileNameComparator() { return cmp; } /** * Sorts strings in reverse lexicographical order. The way we construct the * file names above guarantees that: * 1. Flushed files sort lower than merged files * 2. Flushed files are sorted from newest to oldest (based on the timestamp * string) */ private class FileNameComparator implements Comparator<String> { @Override public int compare(String a, String b) { // Consciously ignoring locale. return -a.compareTo(b); } } @Override public String getBaseDir() { return baseDir; } @Override public void recoverTransaction() throws HyracksDataException { String[] files = listDirFiles(baseDir, transactionFileNameFilter); File dir = new File(baseDir); try { if (files.length == 0) { // Do nothing } else if (files.length > 1) { throw new HyracksDataException("Found more than one transaction"); } else { Files.delete(Paths.get(dir.getPath() + File.separator + files[0])); } } catch (IOException e) { throw new HyracksDataException("Failed to recover transaction", e); } } protected class ComparableFileName implements Comparable<ComparableFileName> { public final FileReference fileRef; public final String fullPath; public final String fileName; // Timestamp interval. public final String[] interval; public ComparableFileName(FileReference fileRef) { this.fileRef = fileRef; this.fullPath = fileRef.getFile().getAbsolutePath(); this.fileName = fileRef.getFile().getName(); interval = fileName.split(SPLIT_STRING); } @Override public int compareTo(ComparableFileName b) { int startCmp = interval[0].compareTo(b.interval[0]); if (startCmp != 0) { return startCmp; } return b.interval[1].compareTo(interval[1]); } } private class RecencyComparator implements Comparator<ComparableFileName> { @Override public int compare(ComparableFileName a, ComparableFileName b) { int cmp = -a.interval[0].compareTo(b.interval[0]); if (cmp != 0) { return cmp; } return -a.interval[1].compareTo(b.interval[1]); } } // This function is used to delete transaction files for aborted transactions @Override public void deleteTransactionFiles() throws HyracksDataException { String[] files = listDirFiles(baseDir, transactionFileNameFilter); if (files.length == 0) { // Do nothing } else if (files.length > 1) { throw new HyracksDataException("Found more than one transaction"); } else { File dir = new File(baseDir); //create transaction filter FilenameFilter transactionFilter = createTransactionFilter(files[0], true); String[] componentsFiles = listDirFiles(baseDir, transactionFilter); for (String fileName : componentsFiles) { try { String absFileName = dir.getPath() + File.separator + fileName; Files.delete(Paths.get(absFileName)); } catch (IOException e) { throw new HyracksDataException("Failed to delete transaction files", e); } } // delete the txn lock file String absFileName = dir.getPath() + File.separator + files[0]; try { Files.delete(Paths.get(absFileName)); } catch (IOException e) { throw new HyracksDataException("Failed to delete transaction files", e); } } } @Override public LSMComponentFileReferences getNewTransactionFileReference() throws IOException { return null; } @Override public LSMComponentFileReferences getTransactionFileReferenceForCommit() throws HyracksDataException { return null; } protected static FilenameFilter transactionFileNameFilter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.startsWith(".T"); } }; protected static FilenameFilter dummyFilter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return true; } }; protected static FilenameFilter createTransactionFilter(String transactionFileName, final boolean inclusive) { final String timeStamp = transactionFileName .substring(transactionFileName.indexOf(TRANSACTION_PREFIX) + TRANSACTION_PREFIX.length()); return new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (inclusive) { return name.startsWith(timeStamp); } else { return !name.startsWith(timeStamp); } } }; } protected FilenameFilter getTransactionFileFilter(boolean inclusive) throws HyracksDataException { String[] files = listDirFiles(baseDir, transactionFileNameFilter); if (files.length == 0) { return dummyFilter; } else { return createTransactionFilter(files[0], inclusive); } } protected FilenameFilter getCompoundFilter(final FilenameFilter filter1, final FilenameFilter filter2) { return new FilenameFilter() { @Override public boolean accept(File dir, String name) { return (filter1.accept(dir, name) && filter2.accept(dir, name)); } }; } /** * @return The string format of the current timestamp. * The returned results of this method are guaranteed to not have duplicates. */ protected String getCurrentTimestamp() { Date date = new Date(); String ts = formatter.format(date); /** * prevent a corner case where the same timestamp can be given. */ while (prevTimestamp != null && ts.compareTo(prevTimestamp) == 0) { try { Thread.sleep(1); date = new Date(); ts = formatter.format(date); } catch (InterruptedException e) { //ignore } } prevTimestamp = ts; return ts; } }