com.cloudera.recordbreaker.analyzer.FSAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

Source

/*
 * Copyright (c) 2012, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */
package com.cloudera.recordbreaker.analyzer;

import java.net.URI;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Map;
import java.util.TreeMap;
import java.util.List;
import java.util.Date;
import java.util.Random;
import java.util.ArrayList;
import java.net.URISyntaxException;

import org.apache.hadoop.hive.cli.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.conf.Configuration;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.almworks.sqlite4java.SQLite;
import com.almworks.sqlite4java.SQLiteJob;
import com.almworks.sqlite4java.SQLiteQueue;
import com.almworks.sqlite4java.SQLiteStatement;
import com.almworks.sqlite4java.SQLiteException;
import com.almworks.sqlite4java.SQLiteConnection;

/***************************************************************
 * <code>FSAnalyzer</code> crawls a filesystem and figures out
 * its schema contents. We place the results of that analysis into
 * a store for future analytics
 *
 * @author "Michael Cafarella" <mjc@cloudera.com>
 ***************************************************************/
public class FSAnalyzer {
    private static final Log LOG = LogFactory.getLog(FSAnalyzer.class);
    static FSAnalyzer fsaInstance;

    public static FSAnalyzer getInstance() {
        return fsaInstance;
    }

    ////////////////////////////////////////
    // All the SQL statements we need
    ////////////////////////////////////////
    static Random r = new Random();
    static SimpleDateFormat fileDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    //
    // 1. Create the schemas
    //
    static String CREATE_TABLE_CONFIG = "CREATE TABLE Configs(propertyname varchar(128), property varchar(256));";
    static String CREATE_TABLE_CRAWL = "CREATE TABLE Crawls(crawlid integer primary key autoincrement, crawlstarted date, crawlfinished date, inprogress text, fsid integer, foreign key(fsid) references Filesystems(fsid));";
    static String CREATE_TABLE_FILESYSTEM = "CREATE TABLE Filesystems(fsid integer primary key autoincrement, fsname text);";
    static String CREATE_TABLE_FILES = "CREATE TABLE Files(fid integer primary key autoincrement, isDir string, crawlid integer, fname varchar(256), owner varchar(16), groupowner varchar(16), permissions varchar(32), size integer, modified date, path varchar(256), foreign key(crawlid) references Crawls(crawlid));";
    static String CREATE_TABLE_TYPES = "CREATE TABLE Types(typeid integer primary key autoincrement, typelabel varchar(64));";
    static String CREATE_TABLE_TYPE_GUESSES = "CREATE TABLE TypeGuesses(fid integer, typeid integer, foreign key(fid) references Files(fid), foreign key(typeid) references Types(typeid));";
    static String CREATE_TABLE_SCHEMAS = "CREATE TABLE Schemas(schemaid integer primary key autoincrement, schemarepr varchar(1024), schemasrcdescription varchar(32), schemapayload blob);";
    static String CREATE_TABLE_GUESSES = "CREATE TABLE SchemaGuesses(fid integer, schemaid integer, foreign key(fid) references Files(fid), foreign key(schemaid) references Schemas(schemaid));";
    static String CREATE_TABLE_HIVESUPPORT = "CREATE TABLE HiveTables(fpath varchar(256), hiveTableName varchar(128));";

    void createTables() throws SQLiteException {
        dbQueue.execute(new SQLiteJob<Object>() {
            protected Object job(SQLiteConnection db) throws SQLiteException {
                try {
                    db.exec(CREATE_TABLE_CONFIG);
                    db.exec(CREATE_TABLE_FILESYSTEM);
                    db.exec(CREATE_TABLE_CRAWL);
                    db.exec(CREATE_TABLE_FILES);
                    db.exec(CREATE_TABLE_TYPES);
                    db.exec(CREATE_TABLE_TYPE_GUESSES);
                    db.exec(CREATE_TABLE_SCHEMAS);
                    db.exec(CREATE_TABLE_GUESSES);
                    db.exec(CREATE_TABLE_HIVESUPPORT);
                } finally {
                }
                return null;
            }
        }).complete();
    }

    ///////////////////////////////////////////////
    // Manage Hive Support
    ///////////////////////////////////////////////
    public String checkHiveSupport(final Path fpath) {
        return dbQueue.execute(new SQLiteJob<String>() {
            protected String job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("SELECT hiveTableName FROM HiveTables WHERE fpath = ?");
                try {
                    stmt.bind(1, fpath.toString());
                    while (stmt.step()) {
                        return stmt.columnString(0);
                    }
                    return null;
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    public void addHiveSupport(final Path fpath, final String tablename) {
        dbQueue.execute(new SQLiteJob<Object>() {
            protected Object job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("INSERT into HiveTables VALUES(?, ?)");
                try {
                    stmt.bind(1, fpath.toString());
                    stmt.bind(2, tablename);
                    stmt.step();
                    return null;
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    ///////////////////////////////////////////////
    // Manage Crawls and Filesystems
    ///////////////////////////////////////////////
    public long getCreateFilesystem(final URI fsuri, boolean canCreate) {
        // REMIND -- must check to make sure FS is valid before accepting it.
        // (E.g., for HDFS see if we can contact it)

        long fsid = dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("SELECT fsid FROM Filesystems WHERE fsname = ?");
                try {
                    stmt.bind(1, fsuri.toString());
                    if (stmt.step()) {
                        long resultId = stmt.columnLong(0);
                        return resultId;
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
        if (fsid >= 0) {
            return fsid;
        }

        // It wasn't there, so create it!
        if (canCreate) {
            return dbQueue.execute(new SQLiteJob<Long>() {
                protected Long job(SQLiteConnection db) throws SQLiteException {
                    SQLiteStatement stmt = db.prepare("INSERT into Filesystems VALUES(null, ?)");
                    try {
                        stmt.bind(1, fsuri.toString());
                        stmt.step();
                        return db.getLastInsertId();
                    } finally {
                        stmt.dispose();
                    }
                }
            }).complete();
        } else {
            return -1L;
        }
    };

    public FileSystem getFS() {
        String uriStr = getConfigProperty("fsuri");
        if (uriStr == null) {
            return null;
        }
        try {
            URI uri = new URI(uriStr);
            FileSystem result = FileSystem.get(uri, new Configuration());
            return result;
        } catch (IOException iex) {
            LOG.error(iex.toString());
            return null;
        } catch (URISyntaxException use) {
            LOG.error(use.toString());
            return null;
        }
    }

    /**
     * Helper fn <code>getNewOrPendingCrawl</code> returns the id of a Crawl for the specified filesystem.
     * If a crawl is pending, that one is returned.
     * If no crawl is pending, a new one is created.
     */
    public long getCreatePendingCrawl(final long fsid, boolean shouldCreate) {
        long crawlid = dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db
                        .prepare("SELECT crawlid from Crawls WHERE fsid = ? AND inprogress = 'True'");
                try {
                    stmt.bind(1, fsid);
                    if (stmt.step()) {
                        return stmt.columnLong(0);
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();

        if (crawlid >= 0) {
            return crawlid;
        }

        // Time to insert
        if (shouldCreate) {
            return dbQueue.execute(new SQLiteJob<Long>() {
                protected Long job(SQLiteConnection db) throws SQLiteException {
                    Date now = new Date(System.currentTimeMillis());
                    String dateCreated = fileDateFormat.format(now);
                    String syntheticDateFinished = fileDateFormat.format(new Date(0));
                    String inprogress = "True";
                    SQLiteStatement stmt = db.prepare("INSERT into Crawls VALUES(null, ?, ?, ?, ?)");
                    try {
                        stmt.bind(1, dateCreated).bind(2, syntheticDateFinished).bind(3, inprogress).bind(4, fsid);
                        stmt.step();
                        return db.getLastInsertId();
                    } finally {
                        stmt.dispose();
                    }
                }
            }).complete();
        }
        return -1L;
    }

    public void completeCrawl(final long crawlid) throws SQLiteException {
        dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db
                        .prepare("UPDATE Crawls SET inprogress='False', crawlfinished=? WHERE crawlid = ?");
                try {
                    Date now = new Date(System.currentTimeMillis());
                    String dateFinished = fileDateFormat.format(now);
                    stmt.bind(1, dateFinished).bind(2, crawlid);
                    if (stmt.step()) {
                        return crawlid;
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    public long getLatestCompleteCrawl(final long fsid) {
        return dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(
                        "SELECT crawlid from Crawls WHERE fsid = ? AND inprogress = 'False' ORDER BY crawlid DESC LIMIT 1");
                try {
                    stmt.bind(1, fsid);
                    if (stmt.step()) {
                        return stmt.columnLong(0);
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    ///////////////////////////////////////////////
    // Manage Types and Schemas
    ///////////////////////////////////////////////
    /**
     * Helper fn <code>getCreateType</code> returns the id of a specified Type in the Types table.
     * The row is created, if necessary.
     */
    long getCreateType(final String typeLabel) throws SQLiteException {
        long typeid = dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("SELECT typeid FROM Types WHERE typelabel = ?");
                try {
                    stmt.bind(1, typeLabel);
                    if (stmt.step()) {
                        long resultId = stmt.columnLong(0);
                        return resultId;
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();

        if (typeid >= 0) {
            return typeid;
        }

        // Time to insert
        return dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("INSERT into Types VALUES(null, ?)");
                try {
                    stmt.bind(1, typeLabel);
                    stmt.step();
                    return db.getLastInsertId();
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    /**
     * Helper fn <code>getCreateSchema</code> returns the id of a specified Schema in the Schemas table.
     * The row is created, if necessary.
     */
    long getCreateSchema(SchemaDescriptor sd) throws SQLiteException {
        final String schemaIdentifier = (sd == null) ? "" : sd.getSchemaIdentifier();
        final String schemaDesc = (sd == null) ? "no schema" : sd.getSchemaSourceDescription();
        final byte[] payload = (sd == null) ? new byte[0] : sd.getPayload();
        long schemaid = dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                final SQLiteStatement stmt = db
                        .prepare("SELECT schemaid FROM Schemas WHERE schemarepr = ? AND schemasrcdescription = ?");
                try {
                    stmt.bind(1, schemaIdentifier).bind(2, schemaDesc);
                    if (stmt.step()) {
                        long resultId = stmt.columnLong(0);
                        return resultId;
                    } else {
                        return -1L;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();

        if (schemaid >= 0) {
            return schemaid;
        }

        // Time to insert
        return dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                final SQLiteStatement stmt = db.prepare("INSERT into Schemas VALUES(null, ?, ?, ?)");
                try {
                    stmt.bind(1, schemaIdentifier).bind(2, schemaDesc).bind(3, payload);
                    stmt.step();
                    return db.getLastInsertId();
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    /**
     * Add a single brand-new file to the system.  Parse it, obtain structure, etc, if needed.
     */
    void addSingleFile(FileSystem fs, Path insertFile, long crawlId) throws IOException {
        FileStatus fstatus = fs.getFileStatus(insertFile);
        addFileMetadata(fstatus, crawlId);
        final boolean isDir = fstatus.isDir();

        if (!isDir) {
            final List<Long> typeGuesses = new ArrayList<Long>();
            DataDescriptor descriptor = formatAnalyzer.describeData(fs, insertFile);
            List<SchemaDescriptor> schemas = null;
            try {
                schemas = descriptor.getSchemaDescriptor();

                if (schemas == null || schemas.size() == 0) {
                    typeGuesses.add(getCreateType(descriptor.getFileTypeIdentifier()));
                    typeGuesses.add(getSingleFileSummary(descriptor.getFilename().toString()).getFid());
                    typeGuesses.add(getCreateSchema(null));
                } else {
                    for (SchemaDescriptor sd : schemas) {
                        typeGuesses.add(getCreateType(descriptor.getFileTypeIdentifier()));
                        typeGuesses.add(getSingleFileSummary(descriptor.getFilename().toString()).getFid());
                        typeGuesses.add(getCreateSchema(sd));
                    }
                }
            } catch (Exception ex) {
                ex.printStackTrace();
            }

            dbQueue.execute(new SQLiteJob<Object>() {
                protected Long job(SQLiteConnection db) throws SQLiteException {
                    for (int i = 0; i < typeGuesses.size(); i += 3) {
                        long typeId = typeGuesses.get(i);
                        long fileId = typeGuesses.get(i + 1);
                        long schemaId = typeGuesses.get(i + 2);

                        SQLiteStatement stmt = db.prepare("INSERT into TypeGuesses VALUES(?, ?)");
                        try {
                            stmt.bind(1, fileId).bind(2, typeId);
                            stmt.step();
                        } finally {
                            stmt.dispose();
                        }
                    }
                    return null;
                }
            }).complete();

            dbQueue.execute(new SQLiteJob<Object>() {
                protected Long job(SQLiteConnection db) throws SQLiteException {
                    for (int i = 0; i < typeGuesses.size(); i += 3) {
                        long typeId = typeGuesses.get(i);
                        long fileId = typeGuesses.get(i + 1);
                        long schemaId = typeGuesses.get(i + 2);

                        SQLiteStatement stmt = db.prepare("INSERT into SchemaGuesses VALUES(?, ?)");
                        try {
                            stmt.bind(1, fileId).bind(2, schemaId);
                            stmt.step();
                        } finally {
                            stmt.dispose();
                        }
                    }
                    return null;
                }
            }).complete();
        }
    }

    /**
     * <code>addFileMetadata</code> stores the pathname, size, owner, etc.
     */
    void addFileMetadata(final FileStatus fstatus, final long crawlId) {
        // Compute strings to represent file metadata
        Path insertFile = fstatus.getPath();
        final boolean isDir = fstatus.isDir();
        FsPermission fsp = fstatus.getPermission();
        final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL
                + fsp.getOtherAction().SYMBOL;

        // Compute formal pathname representation
        String fnameString = null;
        String parentPathString = null;
        if (isDir && insertFile.getParent() == null) {
            parentPathString = "";
            fnameString = insertFile.toString();
        } else {
            fnameString = insertFile.getName();
            parentPathString = insertFile.getParent().toString();

            // REMIND --- mjc --- If we want to modify the Files table s.t. it does
            // not contain the filesystem prefix, then this would be the place to do it.

            if (!parentPathString.endsWith("/")) {
                parentPathString = parentPathString + "/";
            }
        }
        final String parentPath = parentPathString;
        final String fName = fnameString;
        final long fileId = dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
                try {
                    stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName)
                            .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions)
                            .bind(7, fstatus.getLen())
                            .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime())))
                            .bind(9, parentPath);
                    stmt.step();
                    return db.getLastInsertId();
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    ///////////////////////////////////////////////////
    // ACCESSORS FOR SCHEMAS
    ///////////////////////////////////////////////////
    /**
     * <code>getSchemaSummaries</code> returns an instance of SchemaSummary
     * for each unique schema in the database.
     */
    static String schemaInfoQuery = "SELECT schemaid FROM Schemas";

    public List<SchemaSummary> getSchemaSummaries() {
        return dbQueue.execute(new SQLiteJob<List<SchemaSummary>>() {
            protected List<SchemaSummary> job(SQLiteConnection db) throws SQLiteException {
                List<SchemaSummary> output = new ArrayList<SchemaSummary>();
                SQLiteStatement stmt = db.prepare(schemaInfoQuery);

                try {
                    while (stmt.step()) {
                        long schemaId = stmt.columnLong(0);
                        output.add(new SchemaSummary(FSAnalyzer.this, schemaId));
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * Grab details on a schema.
     */
    public SchemaSummaryData getSchemaSummaryData(final long schemaid) {
        return dbQueue.execute(new SQLiteJob<SchemaSummaryData>() {
            protected SchemaSummaryData job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db
                        .prepare("SELECT schemarepr, schemasrcdescription FROM Schemas WHERE schemaid = ?");
                try {
                    stmt.bind(1, schemaid);
                    if (stmt.step()) {
                        return new SchemaSummaryData(schemaid, stmt.columnString(0), stmt.columnString(1));
                    } else {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    /**
     * Get a list of SchemaSummary items, one for each schema in the database.
     * Prefetch a lot of data for efficiency reasons (so we don't need to issue many small queries).
     * Otherwise enumerating schemas can be very slow
     */
    static String precachedSchemaQuery = "SELECT Schemas.schemaid, Schemas.schemarepr, Schemas.schemasrcdescription, SchemaGuesses.fid, TypeGuesses.typeid, Files.crawlid, Files.fname, Files.owner, Files.groupowner, Files.permissions, Files.size, Files.modified, Files.path FROM Schemas, SchemaGuesses, TypeGuesses, Files WHERE SchemaGuesses.schemaid = Schemas.schemaid AND TypeGuesses.fid = SchemaGuesses.fid AND Files.fid = SchemaGuesses.fid ORDER BY Schemas.schemaid";

    public List<SchemaSummary> getPrecachedSchemaSummaries() {
        return dbQueue.execute(new SQLiteJob<List<SchemaSummary>>() {
            protected List<SchemaSummary> job(SQLiteConnection db) throws SQLiteException {
                List<SchemaSummary> output = new ArrayList<SchemaSummary>();
                SQLiteStatement stmt = db.prepare(precachedSchemaQuery);

                try {
                    SchemaSummaryData ssd = null;
                    SchemaSummary ss = null;
                    long lastSchemaId = -1L;
                    List<TypeGuessSummary> tgslist = null;
                    while (stmt.step()) {
                        long schemaid = stmt.columnLong(0);
                        String schemarepr = stmt.columnString(1);
                        String schemasrcdescription = stmt.columnString(2);
                        long fid = stmt.columnLong(3);
                        long typeid = stmt.columnLong(4);
                        long crawlid = stmt.columnLong(5);
                        String fname = stmt.columnString(6);
                        String owner = stmt.columnString(7);
                        String groupowner = stmt.columnString(8);
                        String permissions = stmt.columnString(9);
                        long size = stmt.columnLong(10);
                        String modified = stmt.columnString(11);
                        String path = stmt.columnString(12);

                        TypeGuessSummary tgs = new TypeGuessSummary(FSAnalyzer.this, fid, typeid, schemaid);
                        FileSummary fs = new FileSummary(FSAnalyzer.this, fid);
                        FileSummaryData fsd = new FileSummaryData(FSAnalyzer.this, true, fid, crawlid, fname, owner,
                                groupowner, permissions, size, modified, path);
                        fs.addCachedData(fsd);
                        tgs.addCachedData(fs);

                        if (schemaid != lastSchemaId) {
                            if (ss != null) {
                                ss.addCachedData(tgslist);
                                output.add(ss);
                            }
                            ssd = new SchemaSummaryData(schemaid, schemarepr, schemasrcdescription);
                            ss = new SchemaSummary(FSAnalyzer.this, schemaid);
                            ss.addCachedData(ssd);
                            tgslist = new ArrayList<TypeGuessSummary>();
                        }
                        tgslist.add(tgs);
                        lastSchemaId = schemaid;
                    }
                    if (ss != null) {
                        ss.addCachedData(tgslist);
                        output.add(ss);
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    ///////////////////////////////////////////////////
    // ACCESSORS FOR FILES
    ///////////////////////////////////////////////////
    /**
     * <code>getFidUnderPath</code> returns the files under the given path prefix
     */
    static String subpathFilesQuery = "SELECT fid from Files WHERE path LIKE ?";

    public List<Long> getFidUnderPath(final String pathPrefix) throws SQLiteException {
        List<Long> finalResults = dbQueue.execute(new SQLiteJob<List<Long>>() {
            protected List<Long> job(SQLiteConnection db) throws SQLiteException {
                List<Long> results = new ArrayList<Long>();
                SQLiteStatement stmt = db.prepare(subpathFilesQuery);
                try {
                    stmt.bind(1, pathPrefix + "%");
                    while (stmt.step()) {
                        long resultId = stmt.columnLong(0);
                        results.add(resultId);
                    }
                    return results;
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
        return finalResults;
    }

    /**
     * <code>getFileSummaries</code> returns an instance of FileSummary
     * for each unique schema in the database.
     */
    static String fileInfoQueryWithoutPrefix = "SELECT fid FROM Files WHERE isDir = ?";
    static String fileInfoQueryWithPrefix = "SELECT fid FROM Files WHERE isDir = ? AND path = ?";

    public List<FileSummary> getFileSummariesInDir(final boolean isDir, final String prefix) {
        return dbQueue.execute(new SQLiteJob<List<FileSummary>>() {
            protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException {
                List<FileSummary> output = new ArrayList<FileSummary>();
                SQLiteStatement stmt;
                if (prefix == null) {
                    stmt = db.prepare(fileInfoQueryWithoutPrefix);
                    stmt.bind(1, isDir ? "True" : "False");
                } else {
                    stmt = db.prepare(fileInfoQueryWithPrefix);
                    String prefixStr = prefix;
                    if (!prefixStr.endsWith("/")) {
                        prefixStr += "/";
                    }
                    stmt.bind(1, isDir ? "True" : "False").bind(2, prefixStr);
                }
                try {
                    while (stmt.step()) {
                        long fid = stmt.columnLong(0);
                        output.add(new FileSummary(FSAnalyzer.this, fid));
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * A version of 'getFileSummariesInDir()' where much of the information is cached ahead of time
     */
    static String precachedFileInfoQueryWithoutPrefix = "SELECT Files.fid, Files.crawlid, Files.fname, Files.owner, Files.groupowner, Files.permissions, Files.size, Files.modified, Files.path, SchemaGuesses.schemaid, TypeGuesses.typeid FROM Files, TypeGuesses, SchemaGuesses WHERE Files.isDir = ? AND TypeGuesses.fid = Files.fid AND SchemaGuesses.fid = Files.fid";
    static String precachedFileInfoQueryWithPrefix = precachedFileInfoQueryWithoutPrefix + " AND Files.path = ?";

    public List<FileSummary> getPrecachedFileSummariesInDir(final boolean isDir, final String prefix) {
        return dbQueue.execute(new SQLiteJob<List<FileSummary>>() {
            protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException {
                List<FileSummary> output = new ArrayList<FileSummary>();
                SQLiteStatement stmt;
                if (prefix == null) {
                    stmt = db.prepare(precachedFileInfoQueryWithoutPrefix);
                    stmt.bind(1, isDir ? "True" : "False");
                } else {
                    stmt = db.prepare(precachedFileInfoQueryWithPrefix);
                    String prefixStr = prefix;
                    if (!prefixStr.endsWith("/")) {
                        prefixStr += "/";
                    }
                    stmt.bind(1, isDir ? "True" : "False").bind(2, prefixStr);
                }
                try {
                    FileSummary fs = null;
                    FileSummaryData fsd = null;
                    long lastFid = -1L;
                    List<TypeGuessSummary> tgslist = null;
                    while (stmt.step()) {
                        long fid = stmt.columnLong(0);
                        long crawlid = stmt.columnLong(1);
                        String fname = stmt.columnString(2);
                        String owner = stmt.columnString(3);
                        String groupowner = stmt.columnString(4);
                        String permissions = stmt.columnString(5);
                        long size = stmt.columnLong(6);
                        String modified = stmt.columnString(7);
                        String path = stmt.columnString(8);
                        long schemaid = stmt.columnLong(9);
                        long typeid = stmt.columnLong(10);

                        // We get a tuple for every typeguess.
                        // There could be more than one typeguess for each unique file
                        TypeGuessSummary tgs = new TypeGuessSummary(FSAnalyzer.this, fid, typeid, schemaid);
                        tgs.addCachedData(fs);

                        if (fid != lastFid) {
                            if (fs != null) {
                                fs.addCachedData(tgslist);
                                output.add(fs);
                            }
                            fs = new FileSummary(FSAnalyzer.this, fid);
                            fsd = new FileSummaryData(FSAnalyzer.this, isDir, fid, crawlid, fname, owner,
                                    groupowner, permissions, size, modified, path);
                            fs.addCachedData(fsd);
                            tgslist = new ArrayList<TypeGuessSummary>();
                        }
                        tgslist.add(tgs);
                        lastFid = fid;
                    }
                    if (fs != null) {
                        fs.addCachedData(tgslist);
                        output.add(fs);
                    }
                } catch (SQLiteException sqe) {
                    sqe.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    static String singletonFileInfoQuery = "SELECT fid FROM Files WHERE path||fname = ?";

    public FileSummary getSingleFileSummary(final String fullName) {
        return dbQueue.execute(new SQLiteJob<FileSummary>() {
            protected FileSummary job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(singletonFileInfoQuery);
                stmt.bind(1, fullName);
                try {
                    if (stmt.step()) {
                        long fid = stmt.columnLong(0);
                        return new FileSummary(FSAnalyzer.this, fid);
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return null;
            }
        }).complete();
    }

    /**
     * <code>getFilesForCrawl()</code> returns all the seen files for a given crawlid
     */
    public List<Path> getFilesForCrawl(final long crawlid) {
        return getFileEntriesForCrawl(crawlid, "False");
    }

    public List<Path> getDirsForCrawl(final long crawlid) {
        return getFileEntriesForCrawl(crawlid, "True");
    }

    static String filenameForCrawlQuery = "SELECT path, fname FROM Files WHERE crawlid=? AND isDir = ?";

    private List<Path> getFileEntriesForCrawl(final long crawlid, final String isDir) {
        return dbQueue.execute(new SQLiteJob<List<Path>>() {
            protected List<Path> job(SQLiteConnection db) throws SQLiteException {
                List<Path> output = new ArrayList<Path>();
                SQLiteStatement stmt = db.prepare(filenameForCrawlQuery);
                try {
                    stmt.bind(1, crawlid).bind(2, isDir);
                    while (stmt.step()) {
                        output.add(new Path(stmt.columnString(0), stmt.columnString(1)));
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * Grab details on a specific file.
     */
    public DataDescriptor getDataDescriptor(final long fid) {
        final FileSystem fs = getFS();
        return dbQueue.execute(new SQLiteJob<DataDescriptor>() {
            protected DataDescriptor job(SQLiteConnection db) throws SQLiteException {
                String identifier = null;
                String path = null;
                String fname = null;
                SQLiteStatement stmt = db.prepare(
                        "SELECT Types.typelabel, Files.path, Files.fname FROM Types, TypeGuesses, Files WHERE TypeGuesses.fid = ? AND Files.fid = TypeGuesses.fid AND Types.typeid = TypeGuesses.typeid");
                try {
                    stmt.bind(1, fid);
                    if (stmt.step()) {
                        identifier = stmt.columnString(0);
                        path = stmt.columnString(1);
                        fname = stmt.columnString(2);
                    }
                } finally {
                    stmt.dispose();
                }
                stmt = db.prepare(
                        "SELECT Schemas.schemaid, Schemas.schemarepr, Schemas.schemasrcdescription, Schemas.schemapayload FROM Schemas, SchemaGuesses WHERE SchemaGuesses.fid = ? AND SchemaGuesses.schemaid = Schemas.schemaid");
                try {
                    List<String> schemaReprs = new ArrayList<String>();
                    List<String> schemaDescs = new ArrayList<String>();
                    List<byte[]> schemaBlobs = new ArrayList<byte[]>();

                    stmt.bind(1, fid);
                    while (stmt.step()) {
                        schemaReprs.add(stmt.columnString(1));
                        schemaDescs.add(stmt.columnString(2));
                        schemaBlobs.add(stmt.columnBlob(3));
                    }

                    try {
                        return formatAnalyzer.loadDataDescriptor(fs, new Path(path + fname), identifier,
                                schemaReprs, schemaDescs, schemaBlobs);
                    } catch (IOException ioex) {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    public FileSummaryData getFileSummaryData(final long fid) {
        final FileSystem fs = getFS();
        return dbQueue.execute(new SQLiteJob<FileSummaryData>() {
            protected FileSummaryData job(SQLiteConnection db) throws SQLiteException {
                FileSummaryData fsd = null;
                boolean isDir = false;
                long crawlid = 0L;
                String fname = null;
                String owner = null;
                String groupowner = null;
                String permissions = null;
                long size = 0L;
                String modified = null;
                String path = null;
                String identifier = null;

                SQLiteStatement stmt = db.prepare(
                        "SELECT isDir, crawlid, fname, owner, groupowner, permissions, size, modified, path FROM Files WHERE Files.fid = ?");
                try {
                    stmt.bind(1, fid);
                    if (stmt.step()) {
                        isDir = "True".equals(stmt.columnString(0));
                        crawlid = stmt.columnLong(1);
                        fname = stmt.columnString(2);
                        owner = stmt.columnString(3);
                        groupowner = stmt.columnString(4);
                        permissions = stmt.columnString(5);
                        size = stmt.columnLong(6);
                        modified = stmt.columnString(7);
                        path = stmt.columnString(8);
                    }
                } finally {
                    stmt.dispose();
                }

                if (!isDir) {
                    stmt = db.prepare(
                            "SELECT typelabel FROM Types, TypeGuesses WHERE TypeGuesses.fid = ? AND Types.typeid = TypeGuesses.typeid");
                    try {
                        stmt.bind(1, fid);
                        if (stmt.step()) {
                            identifier = stmt.columnString(0);
                        }
                    } finally {
                        stmt.dispose();
                    }

                    stmt = db.prepare(
                            "SELECT Schemas.schemaid, Schemas.schemarepr, Schemas.schemasrcdescription, Schemas.schemapayload FROM Schemas, SchemaGuesses WHERE SchemaGuesses.fid = ? AND SchemaGuesses.schemaid = Schemas.schemaid");
                    try {
                        List<String> schemaReprs = new ArrayList<String>();
                        List<String> schemaDescs = new ArrayList<String>();
                        List<byte[]> schemaBlobs = new ArrayList<byte[]>();

                        stmt.bind(1, fid);
                        while (stmt.step()) {
                            schemaReprs.add(stmt.columnString(1));
                            schemaDescs.add(stmt.columnString(2));
                            schemaBlobs.add(stmt.columnBlob(3));
                        }

                        try {
                            DataDescriptor dd = formatAnalyzer.loadDataDescriptor(fs, new Path(path + fname),
                                    identifier, schemaReprs, schemaDescs, schemaBlobs);
                            fsd = new FileSummaryData(FSAnalyzer.this, true, fid, crawlid, fname, owner, groupowner,
                                    permissions, size, modified, path);
                            fsd.addCachedData(dd);
                        } catch (IOException iex) {
                            iex.printStackTrace();
                            return null;
                        }
                    } finally {
                        stmt.dispose();
                    }
                } else {
                    fsd = new FileSummaryData(FSAnalyzer.this, false, fid, crawlid, fname, owner, groupowner,
                            permissions, size, modified, path);
                }
                return fsd;
            }
        }).complete();
    }

    /**
     * Get the top-level directory from a given crawl
     */
    public Path getTopDir(final long crawlid) {
        return dbQueue.execute(new SQLiteJob<Path>() {
            protected Path job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(
                        "SELECT path, fname FROM Files WHERE crawlid = ? AND isDir = 'True' ORDER BY length(path||fname) ASC LIMIT 1");
                try {
                    stmt.bind(1, crawlid);
                    if (stmt.step()) {
                        return new Path(stmt.columnString(0) + stmt.columnString(1));
                    } else {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    /**
     * Get the parents for the given directory from a given crawl
     */
    public List<FileSummary> getDirParents(final long crawlid, final String targetDirStr) {
        return dbQueue.execute(new SQLiteJob<List<FileSummary>>() {
            protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException {
                List<FileSummary> output = new ArrayList<FileSummary>();
                SQLiteStatement stmt = db.prepare(
                        "select fid, path, fname from Files WHERE crawlid = ? AND length(?) > length(path||fname) AND isDir = 'True' AND replace(?, path||fname, '') LIKE '/%'");
                try {
                    Path targetDir = new Path(targetDirStr);
                    if (targetDir.getParent() != null) {
                        stmt.bind(1, crawlid).bind(2, targetDir.toString()).bind(3, targetDir.toString());
                        while (stmt.step()) {
                            //Path p = new Path(stmt.columnString(0) + stmt.columnString(1));
                            output.add(new FileSummary(FSAnalyzer.this, stmt.columnLong(0)));
                        }
                    }
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * Get the childiren dirs for the given directory from a given crawl
     */
    public List<FileSummary> getDirChildren(final long crawlid, final String targetDir) {
        return dbQueue.execute(new SQLiteJob<List<FileSummary>>() {
            protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException {
                List<FileSummary> output = new ArrayList<FileSummary>();
                SQLiteStatement stmt = db.prepare(
                        "SELECT DISTINCT fid AS fullpath FROM Files WHERE isDir = 'True' AND crawlid = ? AND path = ? ORDER BY fname ASC");
                try {
                    String targetDirNormalizedStr = targetDir;
                    if (!targetDirNormalizedStr.endsWith("/")) {
                        targetDirNormalizedStr += "/";
                    }
                    stmt.bind(1, crawlid).bind(2, targetDirNormalizedStr);
                    while (stmt.step()) {
                        output.add(new FileSummary(FSAnalyzer.this, stmt.columnLong(0)));
                    }
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    public InputStream getRawBytes(Path p) throws IOException {
        return getFS().open(p);
    }

    ///////////////////////////////////////////////////
    // ACCESSORS FOR CRAWLS
    ///////////////////////////////////////////////////
    /**
     * <code>getCrawlSummaries</code> returns a list of the historical crawl info
     */
    static String crawlInfoQuery = "SELECT crawlid, crawlstarted, crawlfinished, inprogress, fsid FROM Crawls";

    public List<CrawlSummary> getCrawlSummaries() {
        return dbQueue.execute(new SQLiteJob<List<CrawlSummary>>() {
            protected List<CrawlSummary> job(SQLiteConnection db) throws SQLiteException {
                List<CrawlSummary> output = new ArrayList<CrawlSummary>();
                SQLiteStatement stmt = db.prepare(crawlInfoQuery);
                try {
                    while (stmt.step()) {
                        long cid = stmt.columnLong(0);
                        String started = stmt.columnString(1);
                        String finished = stmt.columnString(2);
                        String inprogress = stmt.columnString(3);
                        long fsid = stmt.columnLong(4);
                        output.add(new CrawlSummary(FSAnalyzer.this, cid, started, finished,
                                "True".equals(inprogress), fsid));
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * Grab details on a crawl.
     */
    public CrawlSummary getCrawlSummaryData(final long crawlid) {
        return dbQueue.execute(new SQLiteJob<CrawlSummary>() {
            protected CrawlSummary job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(
                        "SELECT crawlstarted, crawlfinished, inprogress, fsid FROM Crawls WHERE crawlid = ?");
                try {
                    stmt.bind(1, crawlid);
                    if (stmt.step()) {
                        return new CrawlSummary(FSAnalyzer.this, crawlid, stmt.columnString(0),
                                stmt.columnString(1), "True".equals(stmt.columnString(2)), stmt.columnLong(3));
                    } else {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    ///////////////////////////////////////////////////
    // ACCESSORS FOR TYPES
    ///////////////////////////////////////////////////
    /**
     * <code>getTypeSummaries</code> returns an instance of TypeSummary
     * for each unique type in the database.
     */
    static String typeInfoQuery = "SELECT typeid FROM Types";

    public List<TypeSummary> getTypeSummaries() {
        return dbQueue.execute(new SQLiteJob<List<TypeSummary>>() {
            protected List<TypeSummary> job(SQLiteConnection db) throws SQLiteException {
                List<TypeSummary> output = new ArrayList<TypeSummary>();
                SQLiteStatement stmt = db.prepare(typeInfoQuery);

                try {
                    while (stmt.step()) {
                        long typeid = stmt.columnLong(0);
                        output.add(new TypeSummary(FSAnalyzer.this, typeid));
                    }
                } catch (SQLiteException se) {
                    se.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return output;
            }
        }).complete();
    }

    /**
     * Grab details on a type.
     */
    public TypeSummaryData getTypeSummaryData(final long typeid) {
        return dbQueue.execute(new SQLiteJob<TypeSummaryData>() {
            protected TypeSummaryData job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("SELECT typelabel FROM Types WHERE typeid = ?");
                try {
                    stmt.bind(1, typeid);
                    if (stmt.step()) {
                        return new TypeSummaryData(typeid, stmt.columnString(0));
                    } else {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    ///////////////////////////////////////////
    // ACCESSORS FOR CONFIG INFO
    ///////////////////////////////////////////
    /**
     * Read a property's value
     */
    public String getConfigProperty(final String propertyName) {
        return dbQueue.execute(new SQLiteJob<String>() {
            protected String job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("SELECT property FROM Configs WHERE propertyname=?");
                try {
                    stmt.bind(1, propertyName);
                    if (stmt.step()) {
                        return stmt.columnString(0);
                    } else {
                        return null;
                    }
                } finally {
                    stmt.dispose();
                }
            }
        }).complete();
    }

    /**
     * Write a property
     */
    public void setConfigProperty(final String propertyName, final String property) {
        if (property == null) {
            deleteConfigProperty(propertyName);
        } else {
            dbQueue.execute(new SQLiteJob<Object>() {
                protected Object job(SQLiteConnection db) throws SQLiteException {
                    SQLiteStatement stmt = db.prepare("REPLACE into Configs VALUES(?, ?)");
                    try {
                        stmt.bind(1, propertyName);
                        stmt.bind(2, property);
                        stmt.step();
                    } finally {
                        stmt.dispose();
                    }
                    return null;
                }
            }).complete();
        }
    }

    /**
     * Delete a property
     */
    public void deleteConfigProperty(final String propertyName) {
        dbQueue.execute(new SQLiteJob<Object>() {
            protected Object job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare("DELETE from Configs WHERE propertyname=?");
                try {
                    stmt.bind(1, propertyName);
                    stmt.step();
                } finally {
                    stmt.dispose();
                }
                return null;
            }
        }).complete();
    }

    ///////////////////////////////////////////
    // Get type guesses
    ///////////////////////////////////////////
    static String typeGuessQueryForFile = "SELECT SchemaGuesses.fid, TypeGuesses.typeid, SchemaGuesses.schemaid FROM TypeGuesses, SchemaGuesses WHERE TypeGuesses.fid = ? AND TypeGuesses.fid = SchemaGuesses.fid";
    static String typeGuessQueryForSchema = "SELECT SchemaGuesses.fid, TypeGuesses.typeid, SchemaGuesses.schemaid FROM TypeGuesses, SchemaGuesses WHERE SchemaGuesses.schemaid = ? AND TypeGuesses.fid = SchemaGuesses.fid";
    static String typeGuessQueryForType = "SELECT SchemaGuesses.fid, TypeGuesses.typeid, SchemaGuesses.schemaid FROM TypeGuesses, SchemaGuesses WHERE TypeGuesses.typeid = ? AND TypeGuesses.fid = SchemaGuesses.fid";

    public List<TypeGuessSummary> getTypeGuessesForFile(final long fid) {
        return getTypeGuesses(typeGuessQueryForFile, fid);
    }

    public List<TypeGuessSummary> getTypeGuessesForSchema(final long schemaid) {
        return getTypeGuesses(typeGuessQueryForSchema, schemaid);
    }

    static String countFilesQueryForSchema = "SELECT COUNT(DISTINCT fid) FROM SchemaGuesses WHERE schemaid = ?";

    public long countFilesForSchema(final long schemaid) {
        return dbQueue.execute(new SQLiteJob<Long>() {
            protected Long job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(countFilesQueryForSchema);
                try {
                    stmt.bind(1, schemaid);
                    if (stmt.step()) {
                        return stmt.columnLong(0);
                    }
                } finally {
                    stmt.dispose();
                }
                return -1L;
            }
        }).complete();
    }

    public List<TypeGuessSummary> getTypeGuessesForType(final long typeid) {
        return getTypeGuesses(typeGuessQueryForType, typeid);
    }

    List<TypeGuessSummary> getTypeGuesses(final String queryStr, final long idval) {
        return dbQueue.execute(new SQLiteJob<List<TypeGuessSummary>>() {
            protected List<TypeGuessSummary> job(SQLiteConnection db) throws SQLiteException {
                List<TypeGuessSummary> outputList = new ArrayList<TypeGuessSummary>();
                SQLiteStatement stmt = db.prepare(queryStr);
                try {
                    stmt.bind(1, idval);
                    while (stmt.step()) {
                        outputList.add(new TypeGuessSummary(FSAnalyzer.this, stmt.columnLong(0), stmt.columnLong(1),
                                stmt.columnLong(2)));
                    }
                } finally {
                    stmt.dispose();
                }
                return outputList;
            }
        }).complete();
    }

    static String precachedTypeSummaryQuery = "SELECT TypeGuesses.fid, Types.typelabel, SchemaGuesses.schemaid, Files.crawlid, Files.fname, Files.owner, Files.groupowner, Files.permissions, Files.size, Files.modified, Files.path FROM SchemaGuesses, TypeGuesses, Files, Types WHERE TypeGuesses.fid = SchemaGuesses.fid AND TypeGuesses.fid = Files.fid AND TypeGuesses.typeid = Types.typeid AND TypeGuesses.typeId = ?";

    public TypeSummary getPrecachedTypeSummary(final long typeid) {
        return dbQueue.execute(new SQLiteJob<TypeSummary>() {
            protected TypeSummary job(SQLiteConnection db) throws SQLiteException {
                SQLiteStatement stmt = db.prepare(precachedTypeSummaryQuery);
                stmt.bind(1, typeid);

                TypeSummary ts = null;
                try {
                    List<TypeGuessSummary> tgslist = null;
                    while (stmt.step()) {
                        int i = 0;
                        long fid = stmt.columnLong(i++);
                        String typelabel = stmt.columnString(i++);
                        long schemaid = stmt.columnLong(i++);
                        long crawlid = stmt.columnLong(i++);
                        String fname = stmt.columnString(i++);
                        String owner = stmt.columnString(i++);
                        String groupowner = stmt.columnString(i++);
                        String permissions = stmt.columnString(i++);
                        long size = stmt.columnLong(i++);
                        String modified = stmt.columnString(i++);
                        String path = stmt.columnString(i++);

                        if (ts == null) {
                            ts = new TypeSummary(FSAnalyzer.this, typeid);
                            ts.addCachedData(new TypeSummaryData(typeid, typelabel));
                            tgslist = new ArrayList<TypeGuessSummary>();
                        }
                        TypeGuessSummary tg = new TypeGuessSummary(FSAnalyzer.this, fid, typeid, schemaid);
                        FileSummary fs = new FileSummary(FSAnalyzer.this, fid);
                        FileSummaryData fsd = new FileSummaryData(FSAnalyzer.this, true, fid, crawlid, fname, owner,
                                groupowner, permissions, size, modified, path);
                        fs.addCachedData(fsd);
                        tg.addCachedData(fs);
                        tgslist.add(tg);
                    }
                    ts.addCachedData(tgslist);
                } catch (SQLiteException sle) {
                    sle.printStackTrace();
                } finally {
                    stmt.dispose();
                }
                return ts;
            }
        }).complete();
    }

    ////////////////////////////////////////
    // Initialize and close an instance of FSAnalyzer
    ////////////////////////////////////////
    SQLiteConnection db;
    SQLiteQueue dbQueue;
    FormatAnalyzer formatAnalyzer;

    /**
     * Inits (and optionally creates) a new <code>FSAnalyzer</code> instance.
     */
    public FSAnalyzer(File metadataStore, File schemaDir) throws IOException, SQLiteException {
        boolean isNew = false;
        metadataStore = metadataStore.getCanonicalFile();
        if (!metadataStore.exists()) {
            isNew = true;
        }
        this.dbQueue = new SQLiteQueue(metadataStore);
        this.dbQueue.start();

        if (isNew) {
            createTables();
        }
        this.formatAnalyzer = new FormatAnalyzer(schemaDir);
        FSAnalyzer.fsaInstance = this;
    }

    public void close() throws IOException, SQLiteException, InterruptedException {
        this.dbQueue.stop(true).join();
    }
}